VectorInstitute · lotif · Jan 16, 2026 · Jan 21, 2026 · Jan 22, 2026 · Jan 22, 2026
diff --git a/.github/workflows/code_checks.yml b/.github/workflows/code_checks.yml
@@ -54,5 +54,10 @@ jobs:
         uses: pypa/gh-action-pip-audit@1220774d901786e6f652ae159f7b6bc8fea6d266
         with:
           virtual-environment: .venv/
+          # Skipping one nbconvert vulnerability that has no fix version
+          # Skipping one orjson vulnerability that has no fix version
+          # Skipping one protobuf vulnerability that has no fix version
           ignore-vulns: |
             GHSA-xm59-rqc7-hhvf
+            GHSA-hx9q-6w63-j58v
+            GHSA-7gcm-g887-7qv7
diff --git a/.gitignore b/.gitignore
@@ -24,3 +24,6 @@ wheels/
 **.ipynb_checkpoints
 
 .env
+.gradio
+aieng-eval-agents/aieng/agent_evals/report_generation/data/*.db
+aieng-eval-agents/aieng/agent_evals/report_generation/reports/*
diff --git a/aieng-eval-agents/aieng/agent_evals/async_client_manager.py b/aieng-eval-agents/aieng/agent_evals/async_client_manager.py
@@ -0,0 +1,193 @@
+"""Async client lifecycle manager for Gradio applications.
+
+Provides idempotent initialization and proper cleanup of async clients
+like Weaviate and OpenAI to prevent event loop conflicts during Gradio's
+hot-reload process.
+"""
+
+import os
+import sqlite3
+import urllib.parse
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+from aieng.agent_evals.configs import Configs
+from openai import AsyncOpenAI
+from weaviate.client import WeaviateAsyncClient
+
+
+# Will use these as default if no path is provided in the
+# REPORT_GENERATION_DB_PATH and REPORTS_OUTPUT_PATH env vars
+DEFAULT_SQLITE_DB_PATH = Path("aieng-eval-agents/aieng/agent_evals/report_generation/data/OnlineRetail.db")
+DEFAULT_REPORTS_OUTPUT_PATH = Path("aieng-eval-agents/aieng/agent_evals/report_generation/reports/")
+
+
+class SQLiteConnection:
+    """SQLite connection."""
+
+    def __init__(self) -> None:
+        db_path = os.getenv("REPORT_GENERATION_DB_PATH", DEFAULT_SQLITE_DB_PATH)
+        self._connection = sqlite3.connect(db_path)
+
+    def execute(self, query: str) -> list[Any]:
+        """Execute a SQLite query.
+
+        Args:
+            query: The SQLite query to execute.
+
+        Returns
+        -------
+            The result of the query. Will return the result of
+            `execute(query).fetchall()`.
+        """
+        return self._connection.execute(query).fetchall()
+
+    def close(self) -> None:
+        """Close the SQLite connection."""
+        self._connection.close()
+
+
+class ReportFileWriter:
+    """Write reports to a file."""
+
+    def write_report_to_file(
+        self,
+        report_data: list[Any],
+        report_columns: list[str],
+        filename: str = "report.xlsx",
+        gradio_link: bool = True,
+    ) -> str:
+        """Write a report to a XLSX file.
+
+        Args:
+            report_data: The data of the report
+            report_columns: The columns of the report
+            filename: The name of the file to create. Default is "report.xlsx".
+            gradio_link: Whether to return a file link that works with Gradio UI.
+                Default is True.
+
+        Returns
+        -------
+            The path to the report file. If `gradio_link` is True, will return
+                a URL link that allows Gradio UI to donwload the file.
+        """
+        # Create reports directory if it doesn't exist
+        reports_output_path = self.get_reports_output_path()
+        reports_output_path.mkdir(exist_ok=True)
+        filepath = reports_output_path / filename
+
+        report_df = pd.DataFrame(report_data, columns=report_columns)
+        report_df.to_excel(filepath, index=False)
+
+        file_uri = str(filepath)
+        if gradio_link:
+            file_uri = f"gradio_api/file={urllib.parse.quote(str(file_uri), safe='')}"
+
+        return file_uri
+
+    @staticmethod
+    def get_reports_output_path() -> Path:
+        """Get the reports output path.
+
+        If no path is provided in the REPORTS_OUTPUT_PATH env var, will use the
+        default path in DEFAULT_REPORTS_OUTPUT_PATH.
+
+        Returns
+        -------
+            The reports output path.
+        """
+        return Path(os.getenv("REPORTS_OUTPUT_PATH", DEFAULT_REPORTS_OUTPUT_PATH))
+
+
+class AsyncClientManager:
+    """Manages async client lifecycle with lazy initialization and cleanup.
+
+    This class ensures clients are created only once and properly closed,
+    preventing ResourceWarning errors from unclosed event loops.
+
+    Parameters
+    ----------
+    configs: Configs | None, optional, default=None
+        Configuration object for client setup. If None, a new ``Configs()`` is created.
+
+    Examples
+    --------
+    >>> manager = AsyncClientManager()
+    >>> # Access clients (created on first access)
+    >>> weaviate = manager.weaviate_client
+    >>> kb = manager.knowledgebase
+    >>> openai = manager.openai_client
+    >>> # In finally block or cleanup
+    >>> await manager.close()
+    """
+
+    _singleton_instance: "AsyncClientManager | None" = None
+
+    @classmethod
+    def get_instance(cls) -> "AsyncClientManager":
+        """Get the singleton instance of the client manager.
+
+        Returns
+        -------
+            The singleton instance of the client manager.
+        """
+        if cls._singleton_instance is None:
+            cls._singleton_instance = AsyncClientManager()
+        return cls._singleton_instance
+
+    def __init__(self, configs: Configs | None = None) -> None:
+        """Initialize manager with optional configs."""
+        self._configs: Configs | None = configs
+        self._weaviate_client: WeaviateAsyncClient | None = None
+        self._openai_client: AsyncOpenAI | None = None
+        self._sqlite_connection: SQLiteConnection | None = None
+        self._report_file_writer: ReportFileWriter | None = None
+        self._initialized: bool = False
+
+    @property
+    def configs(self) -> Configs:
+        """Get or create configs instance."""
+        if self._configs is None:
+            self._configs = Configs()  # pyright: ignore[reportCallIssue]
+        return self._configs
+
+    @property
+    def openai_client(self) -> AsyncOpenAI:
+        """Get or create OpenAI client."""
+        if self._openai_client is None:
+            self._openai_client = AsyncOpenAI()
+            self._initialized = True
+        return self._openai_client
+
+    @property
+    def sqlite_connection(self) -> SQLiteConnection:
+        """Get or create SQLite session."""
+        if self._sqlite_connection is None:
+            self._sqlite_connection = SQLiteConnection()
+            self._initialized = True
+        return self._sqlite_connection
+
+    @property
+    def report_file_writer(self) -> ReportFileWriter:
+        """Get or create ReportFileWriter."""
+        if self._report_file_writer is None:
+            self._report_file_writer = ReportFileWriter()
+            self._initialized = True
+        return self._report_file_writer
+
+    async def close(self) -> None:
+        """Close all initialized async clients."""
+        if self._openai_client is not None:
+            await self._openai_client.close()
+            self._openai_client = None
+
+        if self._sqlite_connection is not None:
+            self._sqlite_connection.close()
+            self._sqlite_connection = None
+
+        self._initialized = False
+
+    def is_initialized(self) -> bool:
+        """Check if any clients have been initialized."""
+        return self._initialized
diff --git a/aieng-eval-agents/aieng/agent_evals/configs.py b/aieng-eval-agents/aieng/agent_evals/configs.py
@@ -0,0 +1,109 @@
+"""Configuration settings for the agent evals."""
+
+from pydantic import AliasChoices, Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class Configs(BaseSettings):
+    """Configuration settings loaded from environment variables.
+
+    This class automatically loads configuration values from environment variables
+    and a .env file, and provides type-safe access to all settings. It validates
+    environment variables on instantiation.
+
+    Attributes
+    ----------
+    openai_base_url : str
+        Base URL for OpenAI-compatible API (defaults to Gemini endpoint).
+    openai_api_key : str
+        API key for OpenAI-compatible API (accepts OPENAI_API_KEY, GEMINI_API_KEY,
+        or GOOGLE_API_KEY).
+    default_planner_model : str, default='gemini-2.5-pro'
+        Model name for planning tasks. This is typically a more capable and expensive
+        model.
+    default_worker_model : str, default='gemini-2.5-flash'
+        Model name for worker tasks. This is typically a less expensive model.
+    embedding_base_url : str
+        Base URL for embedding API service.
+    embedding_api_key : str
+        API key for embedding service.
+    embedding_model_name : str, default='@cf/baai/bge-m3'
+        Name of the embedding model.
+    weaviate_collection_name : str, default='enwiki_20250520'
+        Name of the Weaviate collection to use.
+    weaviate_api_key : str
+        API key for Weaviate cloud instance.
+    weaviate_http_host : str
+        Weaviate HTTP host (must end with .weaviate.cloud).
+    weaviate_grpc_host : str
+        Weaviate gRPC host (must start with grpc- and end with .weaviate.cloud).
+    weaviate_http_port : int, default=443
+        Port for Weaviate HTTP connections.
+    weaviate_grpc_port : int, default=443
+        Port for Weaviate gRPC connections.
+    weaviate_http_secure : bool, default=True
+        Use secure HTTP connection.
+    weaviate_grpc_secure : bool, default=True
+        Use secure gRPC connection.
+    langfuse_public_key : str
+        Langfuse public key (must start with pk-lf-).
+    langfuse_secret_key : str
+        Langfuse secret key (must start with sk-lf-).
+    langfuse_host : str, default='https://us.cloud.langfuse.com'
+        Langfuse host URL.
+    e2b_api_key : str or None
+        Optional E2B.dev API key for code interpreter (must start with e2b_).
+    default_code_interpreter_template : str or None
+        Optional default template name or ID for E2B.dev code interpreter.
+    web_search_base_url : str or None
+        Optional base URL for web search service.
+    web_search_api_key : str or None
+        Optional API key for web search service.
+
+    Examples
+    --------
+    >>> from src.utils.env_vars import Configs
+    >>> config = Configs()
+    >>> print(config.default_planner_model)
+    'gemini-2.5-pro'
+
+    Notes
+    -----
+    Create a .env file in your project root with the required environment
+    variables. The class will automatically load and validate them.
+    """
+
+    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", env_ignore_empty=True)
+
+    openai_base_url: str = "https://generativelanguage.googleapis.com/v1beta/openai/"
+    openai_api_key: str = Field(validation_alias=AliasChoices("OPENAI_API_KEY", "GEMINI_API_KEY", "GOOGLE_API_KEY"))
+
+    default_planner_model: str = "gemini-2.5-pro"
+    default_worker_model: str = "gemini-2.5-flash"
+
+    embedding_base_url: str
+    embedding_api_key: str
+    embedding_model_name: str = "@cf/baai/bge-m3"
+
+    weaviate_collection_name: str = "enwiki_20250520"
+    weaviate_api_key: str | None = None
+    # ends with .weaviate.cloud, or it's "localhost"
+    weaviate_http_host: str = Field(pattern=r"^.*\.weaviate\.cloud$|localhost")
+    # starts with grpc- ends with .weaviate.cloud, or it's "localhost"
+    weaviate_grpc_host: str = Field(pattern=r"^grpc-.*\.weaviate\.cloud$|localhost")
+    weaviate_http_port: int = 443
+    weaviate_grpc_port: int = 443
+    weaviate_http_secure: bool = True
+    weaviate_grpc_secure: bool = True
+
+    langfuse_public_key: str = Field(pattern=r"^pk-lf-.*$")
+    langfuse_secret_key: str = Field(pattern=r"^sk-lf-.*$")
+    langfuse_host: str = "https://us.cloud.langfuse.com"
+
+    # Optional E2B.dev API key for Python Code Interpreter tool
+    e2b_api_key: str | None = Field(default=None, pattern=r"^e2b_.*$")
+    default_code_interpreter_template: str | None = "9p6favrrqijhasgkq1tv"
+
+    # Optional configs for web search tool
+    web_search_base_url: str | None = None
+    web_search_api_key: str | None = None
diff --git a/aieng-eval-agents/aieng/agent_evals/report_generation/README.md b/aieng-eval-agents/aieng/agent_evals/report_generation/README.md
@@ -0,0 +1,52 @@
+# Report Generation Agent
+
+This code implements an example of a Report Generation Agent for single-table relational
+data source.
+
+The data source implemented here is [SQLite](https://sqlite.org/) which is supported
+natively by Python and saves the data in disk.
+
+The Report Generation Agent will provide an UI to read user queries in natural language
+and procceed to make SQL queries to the database in order to produce the data for
+the report. At the end, the Agent will provide a downloadable link to the report as
+an `.xlsx` file.
+
+## Dataset
+
+The dataset used in this example is the
+[Online Retail](https://archive.ics.uci.edu/dataset/352/online+retail) dataset. It contains
+information about invoices for products that were purchased by customers, which also includes
+product quantity, the invoice date and country that the user resides in. For a more
+detailed data structure, please check the [OnlineRetail.ddl](data/Online%20Retail.ddl) file.
+
+## Importing the Data
+
+To import the data, pleasde download the dataset file from the link below and save it to your
+file system.
+
+https://archive.ics.uci.edu/static/public/352/online+retail.zip
+
+You can import the dataset to the database by running the script below:
+
+```bash
+uv run --env-file .env python -m aieng.agent_evals.report_generation.data.import_online_retail_data --dataset-path <path_to_the_csv_file>
+```
+
+Replace `<path_to_the_csv_file>` with the path the dataset's .CSV file is saved in your machine.
+
+***NOTE:*** You can configure the location the database is saved by setting the path to
+an environment variable named `REPORT_GENERATION_DB_PATH`.
+
+## Running
+
+To run the agent, please execute:
+
+```bash
+uv run --env-file .env python -m aieng.agent_evals.report_generation.main
+```
+
+The agent will be available through a [Gradio](https://www.gradio.app/) web UI under the
+local address http://127.0.0.1:7860, which can be accessed on your preferred browser.
+
+On the UI, there will be a few examples of requests you can make to this agent. It also
+features a text input so you can make your own report requests to it.
diff --git a/aieng-eval-agents/aieng/agent_evals/report_generation/data/OnlineRetail.ddl b/aieng-eval-agents/aieng/agent_evals/report_generation/data/OnlineRetail.ddl
@@ -0,0 +1,10 @@
+CREATE TABLE IF NOT EXISTS "sales" (
+        "InvoiceNo" INTEGER,
+        "StockCode" TEXT,
+        "Description" TEXT,
+        "Quantity" INTEGER,
+        "InvoiceDate" TEXT,
+        "UnitPrice" REAL,
+        "CustomerID" INTEGER,
+        "Country" TEXT
+);