From 5f924aba8d570c606e3686eb1132c15d440ff54e Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 21:05:35 +0000
Subject: [PATCH 01/28] move dataclass into models

---
 ldai/__init__.py                  |  25 ++++
 ldai/client.py                    | 205 ++----------------------------
 ldai/models.py                    | 197 ++++++++++++++++++++++++++++
 ldai/testing/test_agents.py       |   4 +-
 ldai/testing/test_model_config.py |   2 +-
 5 files changed, 236 insertions(+), 197 deletions(-)
 create mode 100644 ldai/models.py

diff --git a/ldai/__init__.py b/ldai/__init__.py
index cb7e545..91b3a2d 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -1 +1,26 @@
 __version__ = "0.10.1"  # x-release-please-version
+
+# Export main client
+from ldai.client import LDAIClient
+
+# Export models for convenience
+from ldai.models import (
+    AIConfig,
+    LDAIAgent,
+    LDAIAgentConfig,
+    LDAIAgentDefaults,
+    LDMessage,
+    ModelConfig,
+    ProviderConfig,
+)
+
+__all__ = [
+    'LDAIClient',
+    'AIConfig',
+    'LDAIAgent',
+    'LDAIAgentConfig',
+    'LDAIAgentDefaults',
+    'LDMessage',
+    'ModelConfig',
+    'ProviderConfig',
+]
diff --git a/ldai/client.py b/ldai/client.py
index a8bd888..db2a6ad 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -1,205 +1,22 @@
-from dataclasses import dataclass
-from typing import Any, Dict, List, Literal, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import chevron
 from ldclient import Context
 from ldclient.client import LDClient
 
+from ldai.models import (
+    AIConfig,
+    LDAIAgent,
+    LDAIAgentConfig,
+    LDAIAgentDefaults,
+    LDAIAgents,
+    LDMessage,
+    ModelConfig,
+    ProviderConfig,
+)
 from ldai.tracker import LDAIConfigTracker
 
 
-@dataclass
-class LDMessage:
-    role: Literal['system', 'user', 'assistant']
-    content: str
-
-    def to_dict(self) -> dict:
-        """
-        Render the given message as a dictionary object.
-        """
-        return {
-            'role': self.role,
-            'content': self.content,
-        }
-
-
-class ModelConfig:
-    """
-    Configuration related to the model.
-    """
-
-    def __init__(self, name: str, parameters: Optional[Dict[str, Any]] = None, custom: Optional[Dict[str, Any]] = None):
-        """
-        :param name: The name of the model.
-        :param parameters: Additional model-specific parameters.
-        :param custom: Additional customer provided data.
-        """
-        self._name = name
-        self._parameters = parameters
-        self._custom = custom
-
-    @property
-    def name(self) -> str:
-        """
-        The name of the model.
-        """
-        return self._name
-
-    def get_parameter(self, key: str) -> Any:
-        """
-        Retrieve model-specific parameters.
-
-        Accessing a named, typed attribute (e.g. name) will result in the call
-        being delegated to the appropriate property.
-        """
-        if key == 'name':
-            return self.name
-
-        if self._parameters is None:
-            return None
-
-        return self._parameters.get(key)
-
-    def get_custom(self, key: str) -> Any:
-        """
-        Retrieve customer provided data.
-        """
-        if self._custom is None:
-            return None
-
-        return self._custom.get(key)
-
-    def to_dict(self) -> dict:
-        """
-        Render the given model config as a dictionary object.
-        """
-        return {
-            'name': self._name,
-            'parameters': self._parameters,
-            'custom': self._custom,
-        }
-
-
-class ProviderConfig:
-    """
-    Configuration related to the provider.
-    """
-
-    def __init__(self, name: str):
-        self._name = name
-
-    @property
-    def name(self) -> str:
-        """
-        The name of the provider.
-        """
-        return self._name
-
-    def to_dict(self) -> dict:
-        """
-        Render the given provider config as a dictionary object.
-        """
-        return {
-            'name': self._name,
-        }
-
-
-@dataclass(frozen=True)
-class AIConfig:
-    enabled: Optional[bool] = None
-    model: Optional[ModelConfig] = None
-    messages: Optional[List[LDMessage]] = None
-    provider: Optional[ProviderConfig] = None
-
-    def to_dict(self) -> dict:
-        """
-        Render the given default values as an AIConfig-compatible dictionary object.
-        """
-        return {
-            '_ldMeta': {
-                'enabled': self.enabled or False,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
-
-
-@dataclass(frozen=True)
-class LDAIAgent:
-    """
-    Represents an AI agent configuration with instructions and model settings.
-
-    An agent is similar to an AIConfig but focuses on instructions rather than messages,
-    making it suitable for AI assistant/agent use cases.
-    """
-    enabled: Optional[bool] = None
-    model: Optional[ModelConfig] = None
-    provider: Optional[ProviderConfig] = None
-    instructions: Optional[str] = None
-    tracker: Optional[LDAIConfigTracker] = None
-
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        Render the given agent as a dictionary object.
-        """
-        result: Dict[str, Any] = {
-            '_ldMeta': {
-                'enabled': self.enabled or False,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
-        if self.instructions is not None:
-            result['instructions'] = self.instructions
-        return result
-
-
-@dataclass(frozen=True)
-class LDAIAgentDefaults:
-    """
-    Default values for AI agent configurations.
-
-    Similar to LDAIAgent but without tracker and with optional enabled field,
-    used as fallback values when agent configurations are not available.
-    """
-    enabled: Optional[bool] = None
-    model: Optional[ModelConfig] = None
-    provider: Optional[ProviderConfig] = None
-    instructions: Optional[str] = None
-
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        Render the given agent defaults as a dictionary object.
-        """
-        result: Dict[str, Any] = {
-            '_ldMeta': {
-                'enabled': self.enabled or False,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
-        if self.instructions is not None:
-            result['instructions'] = self.instructions
-        return result
-
-
-@dataclass
-class LDAIAgentConfig:
-    """
-    Configuration for individual agent in batch requests.
-
-    Combines agent key with its specific default configuration and variables.
-    """
-    key: str
-    default_value: LDAIAgentDefaults
-    variables: Optional[Dict[str, Any]] = None
-
-
-# Type alias for multiple agents
-LDAIAgents = Dict[str, LDAIAgent]
-
-
 class LDAIClient:
     """The LaunchDarkly AI SDK client object."""
 
diff --git a/ldai/models.py b/ldai/models.py
new file mode 100644
index 0000000..4eef5a2
--- /dev/null
+++ b/ldai/models.py
@@ -0,0 +1,197 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Literal, Optional
+
+from ldai.tracker import LDAIConfigTracker
+
+
+@dataclass
+class LDMessage:
+    role: Literal['system', 'user', 'assistant']
+    content: str
+
+    def to_dict(self) -> dict:
+        """
+        Render the given message as a dictionary object.
+        """
+        return {
+            'role': self.role,
+            'content': self.content,
+        }
+
+
+class ModelConfig:
+    """
+    Configuration related to the model.
+    """
+
+    def __init__(self, name: str, parameters: Optional[Dict[str, Any]] = None, custom: Optional[Dict[str, Any]] = None):
+        """
+        :param name: The name of the model.
+        :param parameters: Additional model-specific parameters.
+        :param custom: Additional customer provided data.
+        """
+        self._name = name
+        self._parameters = parameters
+        self._custom = custom
+
+    @property
+    def name(self) -> str:
+        """
+        The name of the model.
+        """
+        return self._name
+
+    def get_parameter(self, key: str) -> Any:
+        """
+        Retrieve model-specific parameters.
+
+        Accessing a named, typed attribute (e.g. name) will result in the call
+        being delegated to the appropriate property.
+        """
+        if key == 'name':
+            return self.name
+
+        if self._parameters is None:
+            return None
+
+        return self._parameters.get(key)
+
+    def get_custom(self, key: str) -> Any:
+        """
+        Retrieve customer provided data.
+        """
+        if self._custom is None:
+            return None
+
+        return self._custom.get(key)
+
+    def to_dict(self) -> dict:
+        """
+        Render the given model config as a dictionary object.
+        """
+        return {
+            'name': self._name,
+            'parameters': self._parameters,
+            'custom': self._custom,
+        }
+
+
+class ProviderConfig:
+    """
+    Configuration related to the provider.
+    """
+
+    def __init__(self, name: str):
+        self._name = name
+
+    @property
+    def name(self) -> str:
+        """
+        The name of the provider.
+        """
+        return self._name
+
+    def to_dict(self) -> dict:
+        """
+        Render the given provider config as a dictionary object.
+        """
+        return {
+            'name': self._name,
+        }
+
+
+@dataclass(frozen=True)
+class AIConfig:
+    enabled: Optional[bool] = None
+    model: Optional[ModelConfig] = None
+    messages: Optional[List[LDMessage]] = None
+    provider: Optional[ProviderConfig] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given default values as an AIConfig-compatible dictionary object.
+        """
+        return {
+            '_ldMeta': {
+                'enabled': self.enabled or False,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+
+
+@dataclass(frozen=True)
+class LDAIAgent:
+    """
+    Represents an AI agent configuration with instructions and model settings.
+
+    An agent is similar to an AIConfig but focuses on instructions rather than messages,
+    making it suitable for AI assistant/agent use cases.
+    """
+    enabled: Optional[bool] = None
+    model: Optional[ModelConfig] = None
+    provider: Optional[ProviderConfig] = None
+    instructions: Optional[str] = None
+    tracker: Optional[LDAIConfigTracker] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the given agent as a dictionary object.
+        """
+        result: Dict[str, Any] = {
+            '_ldMeta': {
+                'enabled': self.enabled or False,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+        if self.instructions is not None:
+            result['instructions'] = self.instructions
+        return result
+
+
+@dataclass(frozen=True)
+class LDAIAgentDefaults:
+    """
+    Default values for AI agent configurations.
+
+    Similar to LDAIAgent but without tracker and with optional enabled field,
+    used as fallback values when agent configurations are not available.
+    """
+    enabled: Optional[bool] = None
+    model: Optional[ModelConfig] = None
+    provider: Optional[ProviderConfig] = None
+    instructions: Optional[str] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the given agent defaults as a dictionary object.
+        """
+        result: Dict[str, Any] = {
+            '_ldMeta': {
+                'enabled': self.enabled or False,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+        if self.instructions is not None:
+            result['instructions'] = self.instructions
+        return result
+
+
+@dataclass
+class LDAIAgentConfig:
+    """
+    Configuration for individual agent in batch requests.
+
+    Combines agent key with its specific default configuration and variables.
+    """
+    key: str
+    default_value: LDAIAgentDefaults
+    variables: Optional[Dict[str, Any]] = None
+
+
+# Type alias for multiple agents
+LDAIAgents = Dict[str, LDAIAgent]
+
diff --git a/ldai/testing/test_agents.py b/ldai/testing/test_agents.py
index b2e80c0..755f2e5 100644
--- a/ldai/testing/test_agents.py
+++ b/ldai/testing/test_agents.py
@@ -2,8 +2,8 @@
 from ldclient import Config, Context, LDClient
 from ldclient.integrations.test_data import TestData
 
-from ldai.client import (LDAIAgentConfig, LDAIAgentDefaults, LDAIClient,
-                         ModelConfig, ProviderConfig)
+from ldai import (LDAIAgentConfig, LDAIAgentDefaults, LDAIClient, ModelConfig,
+                  ProviderConfig)
 
 
 @pytest.fixture
diff --git a/ldai/testing/test_model_config.py b/ldai/testing/test_model_config.py
index 1ffc033..b35389d 100644
--- a/ldai/testing/test_model_config.py
+++ b/ldai/testing/test_model_config.py
@@ -2,7 +2,7 @@
 from ldclient import Config, Context, LDClient
 from ldclient.integrations.test_data import TestData
 
-from ldai.client import AIConfig, LDAIClient, LDMessage, ModelConfig
+from ldai import AIConfig, LDAIClient, LDMessage, ModelConfig
 
 
 @pytest.fixture

From 951eda13bc01b0515ef6e1ce042f5647d5fbd43e Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 21:22:22 +0000
Subject: [PATCH 02/28] create new config types completion, agent, and judges

---
 ldai/__init__.py |  28 +++++--
 ldai/client.py   | 200 +++++++++++++++++++++++++++++++++++++----------
 ldai/models.py   | 189 +++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 348 insertions(+), 69 deletions(-)

diff --git a/ldai/__init__.py b/ldai/__init__.py
index 91b3a2d..11369c9 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -5,10 +5,16 @@
 
 # Export models for convenience
 from ldai.models import (
-    AIConfig,
-    LDAIAgent,
-    LDAIAgentConfig,
-    LDAIAgentDefaults,
+    AIAgentConfig,
+    AIAgentConfigDefault,
+    AIAgentConfigRequest,
+    AIAgents,
+    AICompletionConfig,
+    AICompletionConfigDefault,
+    AIJudgeConfig,
+    AIJudgeConfigDefault,
+    Judge,
+    JudgeConfiguration,
     LDMessage,
     ModelConfig,
     ProviderConfig,
@@ -16,10 +22,16 @@
 
 __all__ = [
     'LDAIClient',
-    'AIConfig',
-    'LDAIAgent',
-    'LDAIAgentConfig',
-    'LDAIAgentDefaults',
+    'AIAgentConfig',
+    'AIAgentConfigDefault',
+    'AIAgentConfigRequest',
+    'AIAgents',
+    'AICompletionConfig',
+    'AICompletionConfigDefault',
+    'AIJudgeConfig',
+    'AIJudgeConfigDefault',
+    'Judge',
+    'JudgeConfiguration',
     'LDMessage',
     'ModelConfig',
     'ProviderConfig',
diff --git a/ldai/client.py b/ldai/client.py
index db2a6ad..4f3cc9e 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -5,11 +5,16 @@
 from ldclient.client import LDClient
 
 from ldai.models import (
-    AIConfig,
-    LDAIAgent,
-    LDAIAgentConfig,
-    LDAIAgentDefaults,
-    LDAIAgents,
+    AIAgentConfig,
+    AIAgentConfigDefault,
+    AIAgentConfigRequest,
+    AIAgents,
+    AICompletionConfig,
+    AICompletionConfigDefault,
+    AIJudgeConfig,
+    AIJudgeConfigDefault,
+    Judge,
+    JudgeConfiguration,
     LDMessage,
     ModelConfig,
     ProviderConfig,
@@ -23,40 +28,103 @@ class LDAIClient:
     def __init__(self, client: LDClient):
         self._client = client
 
+    def completion_config(
+        self,
+        key: str,
+        context: Context,
+        default_value: AICompletionConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> AICompletionConfig:
+        """
+        Get the value of a completion configuration.
+
+        :param key: The key of the completion configuration.
+        :param context: The context to evaluate the completion configuration in.
+        :param default_value: The default value of the completion configuration.
+        :param variables: Additional variables for the completion configuration.
+        :return: The completion configuration with a tracker used for gathering metrics.
+        """
+        self._client.track('$ld:ai:config:function:single', context, key, 1)
+
+        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
+            key, context, default_value.to_dict(), variables
+        )
+
+        config = AICompletionConfig(
+            enabled=bool(enabled),
+            model=model,
+            messages=messages,
+            provider=provider,
+            tracker=tracker,
+            judge_configuration=judge_configuration,
+        )
+
+        return config
+
     def config(
         self,
         key: str,
         context: Context,
-        default_value: AIConfig,
+        default_value: AICompletionConfigDefault,
         variables: Optional[Dict[str, Any]] = None,
-    ) -> Tuple[AIConfig, LDAIConfigTracker]:
+    ) -> AICompletionConfig:
         """
         Get the value of a model configuration.
 
+        .. deprecated:: Use :meth:`completion_config` instead. This method will be removed in a future version.
+
         :param key: The key of the model configuration.
         :param context: The context to evaluate the model configuration in.
         :param default_value: The default value of the model configuration.
         :param variables: Additional variables for the model configuration.
         :return: The value of the model configuration along with a tracker used for gathering metrics.
         """
-        self._client.track('$ld:ai:config:function:single', context, key, 1)
+        return self.completion_config(key, context, default_value, variables)
 
-        model, provider, messages, instructions, tracker, enabled = self.__evaluate(key, context, default_value.to_dict(), variables)
+    def judge_config(
+        self,
+        key: str,
+        context: Context,
+        default_value: AIJudgeConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> AIJudgeConfig:
+        """
+        Get the value of a judge configuration.
+
+        :param key: The key of the judge configuration.
+        :param context: The context to evaluate the judge configuration in.
+        :param default_value: The default value of the judge configuration.
+        :param variables: Additional variables for the judge configuration.
+        :return: The judge configuration with a tracker used for gathering metrics.
+        """
+        self._client.track('$ld:ai:judge:function:single', context, key, 1)
+
+        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
+            key, context, default_value.to_dict(), variables
+        )
 
-        config = AIConfig(
+        # Extract evaluation_metric_keys from the variation
+        variation = self._client.variation(key, context, default_value.to_dict())
+        evaluation_metric_keys = variation.get('evaluationMetricKeys', default_value.evaluation_metric_keys or [])
+
+        config = AIJudgeConfig(
             enabled=bool(enabled),
+            evaluation_metric_keys=evaluation_metric_keys,
             model=model,
             messages=messages,
             provider=provider,
+            tracker=tracker,
         )
 
-        return config, tracker
+        return config
 
-    def agent(
+    def agent_config(
         self,
-        config: LDAIAgentConfig,
+        key: str,
         context: Context,
-    ) -> LDAIAgent:
+        default_value: AIAgentConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+    ) -> AIAgentConfig:
         """
         Retrieve a single AI Config agent.
 
@@ -65,39 +133,58 @@ def agent(
 
         Example::
 
-            agent = client.agent(LDAIAgentConfig(
-                key='research_agent',
-                default_value=LDAIAgentDefaults(
+            agent = client.agent_config(
+                'research_agent',
+                context,
+                AIAgentConfigDefault(
                     enabled=True,
                     model=ModelConfig('gpt-4'),
                     instructions="You are a research assistant specializing in {{topic}}."
                 ),
                 variables={'topic': 'climate change'}
-            ), context)
+            )
 
             if agent.enabled:
                 research_result = agent.instructions  # Interpolated instructions
                 agent.tracker.track_success()
 
-        :param config: The agent configuration to use.
+        :param key: The agent configuration key.
         :param context: The context to evaluate the agent configuration in.
-        :return: Configured LDAIAgent instance.
+        :param default_value: Default agent values.
+        :param variables: Variables for interpolation.
+        :return: Configured AIAgentConfig instance.
         """
         # Track single agent usage
         self._client.track(
             "$ld:ai:agent:function:single",
             context,
-            config.key,
+            key,
             1
         )
 
-        return self.__evaluate_agent(config.key, context, config.default_value, config.variables)
+        return self.__evaluate_agent(key, context, default_value, variables)
 
-    def agents(
+    def agent(
         self,
-        agent_configs: List[LDAIAgentConfig],
+        config: AIAgentConfigRequest,
         context: Context,
-    ) -> LDAIAgents:
+    ) -> AIAgentConfig:
+        """
+        Retrieve a single AI Config agent.
+
+        .. deprecated:: Use :meth:`agent_config` instead. This method will be removed in a future version.
+
+        :param config: The agent configuration to use.
+        :param context: The context to evaluate the agent configuration in.
+        :return: Configured AIAgentConfig instance.
+        """
+        return self.agent_config(config.key, context, config.default_value, config.variables)
+
+    def agent_configs(
+        self,
+        agent_configs: List[AIAgentConfigRequest],
+        context: Context,
+    ) -> AIAgents:
         """
         Retrieve multiple AI agent configurations.
 
@@ -107,18 +194,18 @@ def agents(
 
         Example::
 
-            agents = client.agents([
-                LDAIAgentConfig(
+            agents = client.agent_configs([
+                AIAgentConfigRequest(
                     key='research_agent',
-                    default_value=LDAIAgentDefaults(
+                    default_value=AIAgentConfigDefault(
                         enabled=True,
                         instructions='You are a research assistant.'
                     ),
                     variables={'topic': 'climate change'}
                 ),
-                LDAIAgentConfig(
+                AIAgentConfigRequest(
                     key='writing_agent',
-                    default_value=LDAIAgentDefaults(
+                    default_value=AIAgentConfigDefault(
                         enabled=True,
                         instructions='You are a writing assistant.'
                     ),
@@ -131,7 +218,7 @@ def agents(
 
         :param agent_configs: List of agent configurations to retrieve.
         :param context: The context to evaluate the agent configurations in.
-        :return: Dictionary mapping agent keys to their LDAIAgent configurations.
+        :return: Dictionary mapping agent keys to their AIAgentConfig configurations.
         """
         # Track multiple agents usage
         agent_count = len(agent_configs)
@@ -142,7 +229,7 @@ def agents(
             agent_count
         )
 
-        result: LDAIAgents = {}
+        result: AIAgents = {}
 
         for config in agent_configs:
             agent = self.__evaluate_agent(
@@ -155,13 +242,29 @@ def agents(
 
         return result
 
+    def agents(
+        self,
+        agent_configs: List[AIAgentConfigRequest],
+        context: Context,
+    ) -> AIAgents:
+        """
+        Retrieve multiple AI agent configurations.
+
+        .. deprecated:: Use :meth:`agent_configs` instead. This method will be removed in a future version.
+
+        :param agent_configs: List of agent configurations to retrieve.
+        :param context: The context to evaluate the agent configurations in.
+        :return: Dictionary mapping agent keys to their AIAgentConfig configurations.
+        """
+        return self.agent_configs(agent_configs, context)
+
     def __evaluate(
         self,
         key: str,
         context: Context,
         default_dict: Dict[str, Any],
         variables: Optional[Dict[str, Any]] = None,
-    ) -> Tuple[Optional[ModelConfig], Optional[ProviderConfig], Optional[List[LDMessage]], Optional[str], LDAIConfigTracker, bool]:
+    ) -> Tuple[Optional[ModelConfig], Optional[ProviderConfig], Optional[List[LDMessage]], Optional[str], LDAIConfigTracker, bool, Optional[Any]]:
         """
         Internal method to evaluate a configuration and extract components.
 
@@ -228,15 +331,31 @@ def __evaluate(
 
         enabled = variation.get('_ldMeta', {}).get('enabled', False)
 
-        return model, provider_config, messages, instructions, tracker, enabled
+        # Extract judge configuration
+        judge_configuration = None
+        if 'judgeConfiguration' in variation and isinstance(variation['judgeConfiguration'], dict):
+            judge_config = variation['judgeConfiguration']
+            if 'judges' in judge_config and isinstance(judge_config['judges'], list):
+                judges = [
+                    Judge(
+                        key=judge['key'],
+                        sampling_rate=judge['samplingRate']
+                    )
+                    for judge in judge_config['judges']
+                    if isinstance(judge, dict) and 'key' in judge and 'samplingRate' in judge
+                ]
+                if judges:
+                    judge_configuration = JudgeConfiguration(judges=judges)
+
+        return model, provider_config, messages, instructions, tracker, enabled, judge_configuration
 
     def __evaluate_agent(
         self,
         key: str,
         context: Context,
-        default_value: LDAIAgentDefaults,
+        default_value: AIAgentConfigDefault,
         variables: Optional[Dict[str, Any]] = None,
-    ) -> LDAIAgent:
+    ) -> AIAgentConfig:
         """
         Internal method to evaluate an agent configuration.
 
@@ -244,21 +363,22 @@ def __evaluate_agent(
         :param context: The evaluation context.
         :param default_value: Default agent values.
         :param variables: Variables for interpolation.
-        :return: Configured LDAIAgent instance.
+        :return: Configured AIAgentConfig instance.
         """
-        model, provider, messages, instructions, tracker, enabled = self.__evaluate(
+        model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate(
             key, context, default_value.to_dict(), variables
         )
 
         # For agents, prioritize instructions over messages
         final_instructions = instructions if instructions is not None else default_value.instructions
 
-        return LDAIAgent(
-            enabled=bool(enabled) if enabled is not None else default_value.enabled,
+        return AIAgentConfig(
+            enabled=bool(enabled) if enabled is not None else (default_value.enabled or False),
             model=model or default_value.model,
             provider=provider or default_value.provider,
             instructions=final_instructions,
             tracker=tracker,
+            judge_configuration=judge_configuration or default_value.judge_configuration,
         )
 
     def __interpolate_template(self, template: str, variables: Dict[str, Any]) -> str:
diff --git a/ldai/models.py b/ldai/models.py
index 4eef5a2..83b5326 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -100,18 +100,64 @@ def to_dict(self) -> dict:
         }
 
 
+# ============================================================================
+# Judge Types
+# ============================================================================
+
+@dataclass(frozen=True)
+class Judge:
+    """
+    Configuration for a single judge attachment.
+    """
+    key: str
+    sampling_rate: float
+
+    def to_dict(self) -> dict:
+        """
+        Render the judge as a dictionary object.
+        """
+        return {
+            'key': self.key,
+            'samplingRate': self.sampling_rate,
+        }
+
+
+@dataclass(frozen=True)
+class JudgeConfiguration:
+    """
+    Configuration for judge attachment to AI Configs.
+    """
+    judges: List[Judge]
+
+    def to_dict(self) -> dict:
+        """
+        Render the judge configuration as a dictionary object.
+        """
+        return {
+            'judges': [judge.to_dict() for judge in self.judges],
+        }
+
+
+# ============================================================================
+# Completion Config Types
+# ============================================================================
+
 @dataclass(frozen=True)
-class AIConfig:
+class AICompletionConfigDefault:
+    """
+    Default Completion AI Config (default mode).
+    """
     enabled: Optional[bool] = None
     model: Optional[ModelConfig] = None
     messages: Optional[List[LDMessage]] = None
     provider: Optional[ProviderConfig] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
 
     def to_dict(self) -> dict:
         """
-        Render the given default values as an AIConfig-compatible dictionary object.
+        Render the given default values as an AICompletionConfigDefault-compatible dictionary object.
         """
-        return {
+        result = {
             '_ldMeta': {
                 'enabled': self.enabled or False,
             },
@@ -119,25 +165,59 @@ def to_dict(self) -> dict:
             'messages': [message.to_dict() for message in self.messages] if self.messages else None,
             'provider': self.provider.to_dict() if self.provider else None,
         }
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
+        return result
 
 
 @dataclass(frozen=True)
-class LDAIAgent:
+class AICompletionConfig:
+    """
+    Completion AI Config (default mode).
     """
-    Represents an AI agent configuration with instructions and model settings.
+    enabled: bool
+    model: Optional[ModelConfig] = None
+    messages: Optional[List[LDMessage]] = None
+    provider: Optional[ProviderConfig] = None
+    tracker: Optional[LDAIConfigTracker] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given completion config as a dictionary object.
+        """
+        result = {
+            '_ldMeta': {
+                'enabled': self.enabled,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
+        return result
+
 
-    An agent is similar to an AIConfig but focuses on instructions rather than messages,
-    making it suitable for AI assistant/agent use cases.
+# ============================================================================
+# Agent Config Types
+# ============================================================================
+
+
+@dataclass(frozen=True)
+class AIAgentConfigDefault:
+    """
+    Default Agent-specific AI Config with instructions.
     """
     enabled: Optional[bool] = None
     model: Optional[ModelConfig] = None
     provider: Optional[ProviderConfig] = None
     instructions: Optional[str] = None
-    tracker: Optional[LDAIConfigTracker] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
 
     def to_dict(self) -> Dict[str, Any]:
         """
-        Render the given agent as a dictionary object.
+        Render the given agent config default as a dictionary object.
         """
         result: Dict[str, Any] = {
             '_ldMeta': {
@@ -148,50 +228,117 @@ def to_dict(self) -> Dict[str, Any]:
         }
         if self.instructions is not None:
             result['instructions'] = self.instructions
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
         return result
 
 
 @dataclass(frozen=True)
-class LDAIAgentDefaults:
+class AIAgentConfig:
     """
-    Default values for AI agent configurations.
-
-    Similar to LDAIAgent but without tracker and with optional enabled field,
-    used as fallback values when agent configurations are not available.
+    Agent-specific AI Config with instructions.
     """
-    enabled: Optional[bool] = None
+    enabled: bool
     model: Optional[ModelConfig] = None
     provider: Optional[ProviderConfig] = None
     instructions: Optional[str] = None
+    tracker: Optional[LDAIConfigTracker] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
 
     def to_dict(self) -> Dict[str, Any]:
         """
-        Render the given agent defaults as a dictionary object.
+        Render the given agent config as a dictionary object.
         """
         result: Dict[str, Any] = {
             '_ldMeta': {
-                'enabled': self.enabled or False,
+                'enabled': self.enabled,
             },
             'model': self.model.to_dict() if self.model else None,
             'provider': self.provider.to_dict() if self.provider else None,
         }
         if self.instructions is not None:
             result['instructions'] = self.instructions
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
         return result
 
 
+# ============================================================================
+# Judge Config Types
+# ============================================================================
+
+@dataclass(frozen=True)
+class AIJudgeConfigDefault:
+    """
+    Default Judge-specific AI Config with required evaluation metric key.
+    """
+    enabled: Optional[bool] = None
+    model: Optional[ModelConfig] = None
+    messages: Optional[List[LDMessage]] = None
+    provider: Optional[ProviderConfig] = None
+    evaluation_metric_keys: Optional[List[str]] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given judge config default as a dictionary object.
+        """
+        result = {
+            '_ldMeta': {
+                'enabled': self.enabled or False,
+            },
+            'model': self.model.to_dict() if self.model else None,
+            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+        if self.evaluation_metric_keys is not None:
+            result['evaluationMetricKeys'] = self.evaluation_metric_keys
+        return result
+
+
+@dataclass(frozen=True)
+class AIJudgeConfig:
+    """
+    Judge-specific AI Config with required evaluation metric key.
+    """
+    enabled: bool
+    evaluation_metric_keys: List[str]
+    model: Optional[ModelConfig] = None
+    messages: Optional[List[LDMessage]] = None
+    provider: Optional[ProviderConfig] = None
+    tracker: Optional[LDAIConfigTracker] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given judge config as a dictionary object.
+        """
+        result = {
+            '_ldMeta': {
+                'enabled': self.enabled,
+            },
+            'evaluationMetricKeys': self.evaluation_metric_keys,
+            'model': self.model.to_dict() if self.model else None,
+            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
+            'provider': self.provider.to_dict() if self.provider else None,
+        }
+        return result
+
+
+# ============================================================================
+# Agent Request Config
+# ============================================================================
+
 @dataclass
-class LDAIAgentConfig:
+class AIAgentConfigRequest:
     """
-    Configuration for individual agent in batch requests.
+    Configuration for a single agent request.
 
     Combines agent key with its specific default configuration and variables.
     """
     key: str
-    default_value: LDAIAgentDefaults
+    default_value: AIAgentConfigDefault
     variables: Optional[Dict[str, Any]] = None
 
 
 # Type alias for multiple agents
-LDAIAgents = Dict[str, LDAIAgent]
+AIAgents = Dict[str, AIAgentConfig]
 

From ae7516be21a304942ddf0a65493eb54e8f8bc984 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 21:26:32 +0000
Subject: [PATCH 03/28] use inheritance for configs for consistency

---
 ldai/models.py | 131 +++++++++++++++++++++++--------------------------
 1 file changed, 61 insertions(+), 70 deletions(-)

diff --git a/ldai/models.py b/ldai/models.py
index 83b5326..e8ddf21 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -139,61 +139,91 @@ def to_dict(self) -> dict:
 
 
 # ============================================================================
-# Completion Config Types
+# Base AI Config Types
 # ============================================================================
 
 @dataclass(frozen=True)
-class AICompletionConfigDefault:
+class AIConfigDefault:
     """
-    Default Completion AI Config (default mode).
+    Base AI Config interface for default implementations with optional enabled property.
     """
     enabled: Optional[bool] = None
     model: Optional[ModelConfig] = None
-    messages: Optional[List[LDMessage]] = None
     provider: Optional[ProviderConfig] = None
-    judge_configuration: Optional[JudgeConfiguration] = None
 
-    def to_dict(self) -> dict:
+    def _base_to_dict(self) -> Dict[str, Any]:
         """
-        Render the given default values as an AICompletionConfigDefault-compatible dictionary object.
+        Render the base config fields as a dictionary object.
         """
-        result = {
+        return {
             '_ldMeta': {
                 'enabled': self.enabled or False,
             },
             'model': self.model.to_dict() if self.model else None,
-            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
             'provider': self.provider.to_dict() if self.provider else None,
         }
-        if self.judge_configuration is not None:
-            result['judgeConfiguration'] = self.judge_configuration.to_dict()
-        return result
 
 
 @dataclass(frozen=True)
-class AICompletionConfig:
+class AIConfig:
     """
-    Completion AI Config (default mode).
+    Base AI Config interface without mode-specific fields.
     """
     enabled: bool
     model: Optional[ModelConfig] = None
-    messages: Optional[List[LDMessage]] = None
     provider: Optional[ProviderConfig] = None
     tracker: Optional[LDAIConfigTracker] = None
-    judge_configuration: Optional[JudgeConfiguration] = None
 
-    def to_dict(self) -> dict:
+    def _base_to_dict(self) -> Dict[str, Any]:
         """
-        Render the given completion config as a dictionary object.
+        Render the base config fields as a dictionary object.
         """
-        result = {
+        return {
             '_ldMeta': {
                 'enabled': self.enabled,
             },
             'model': self.model.to_dict() if self.model else None,
-            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
             'provider': self.provider.to_dict() if self.provider else None,
         }
+
+
+# ============================================================================
+# Completion Config Types
+# ============================================================================
+
+@dataclass(frozen=True)
+class AICompletionConfigDefault(AIConfigDefault):
+    """
+    Default Completion AI Config (default mode).
+    """
+    messages: Optional[List[LDMessage]] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given default values as an AICompletionConfigDefault-compatible dictionary object.
+        """
+        result = self._base_to_dict()
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
+        if self.judge_configuration is not None:
+            result['judgeConfiguration'] = self.judge_configuration.to_dict()
+        return result
+
+
+@dataclass(frozen=True)
+class AICompletionConfig(AIConfig):
+    """
+    Completion AI Config (default mode).
+    """
+    messages: Optional[List[LDMessage]] = None
+    judge_configuration: Optional[JudgeConfiguration] = None
+
+    def to_dict(self) -> dict:
+        """
+        Render the given completion config as a dictionary object.
+        """
+        result = self._base_to_dict()
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
         if self.judge_configuration is not None:
             result['judgeConfiguration'] = self.judge_configuration.to_dict()
         return result
@@ -203,15 +233,11 @@ def to_dict(self) -> dict:
 # Agent Config Types
 # ============================================================================
 
-
 @dataclass(frozen=True)
-class AIAgentConfigDefault:
+class AIAgentConfigDefault(AIConfigDefault):
     """
     Default Agent-specific AI Config with instructions.
     """
-    enabled: Optional[bool] = None
-    model: Optional[ModelConfig] = None
-    provider: Optional[ProviderConfig] = None
     instructions: Optional[str] = None
     judge_configuration: Optional[JudgeConfiguration] = None
 
@@ -219,13 +245,7 @@ def to_dict(self) -> Dict[str, Any]:
         """
         Render the given agent config default as a dictionary object.
         """
-        result: Dict[str, Any] = {
-            '_ldMeta': {
-                'enabled': self.enabled or False,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
+        result = self._base_to_dict()
         if self.instructions is not None:
             result['instructions'] = self.instructions
         if self.judge_configuration is not None:
@@ -234,28 +254,18 @@ def to_dict(self) -> Dict[str, Any]:
 
 
 @dataclass(frozen=True)
-class AIAgentConfig:
+class AIAgentConfig(AIConfig):
     """
     Agent-specific AI Config with instructions.
     """
-    enabled: bool
-    model: Optional[ModelConfig] = None
-    provider: Optional[ProviderConfig] = None
     instructions: Optional[str] = None
-    tracker: Optional[LDAIConfigTracker] = None
     judge_configuration: Optional[JudgeConfiguration] = None
 
     def to_dict(self) -> Dict[str, Any]:
         """
         Render the given agent config as a dictionary object.
         """
-        result: Dict[str, Any] = {
-            '_ldMeta': {
-                'enabled': self.enabled,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
+        result = self._base_to_dict()
         if self.instructions is not None:
             result['instructions'] = self.instructions
         if self.judge_configuration is not None:
@@ -268,58 +278,39 @@ def to_dict(self) -> Dict[str, Any]:
 # ============================================================================
 
 @dataclass(frozen=True)
-class AIJudgeConfigDefault:
+class AIJudgeConfigDefault(AIConfigDefault):
     """
     Default Judge-specific AI Config with required evaluation metric key.
     """
-    enabled: Optional[bool] = None
-    model: Optional[ModelConfig] = None
     messages: Optional[List[LDMessage]] = None
-    provider: Optional[ProviderConfig] = None
     evaluation_metric_keys: Optional[List[str]] = None
 
     def to_dict(self) -> dict:
         """
         Render the given judge config default as a dictionary object.
         """
-        result = {
-            '_ldMeta': {
-                'enabled': self.enabled or False,
-            },
-            'model': self.model.to_dict() if self.model else None,
-            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
+        result = self._base_to_dict()
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
         if self.evaluation_metric_keys is not None:
             result['evaluationMetricKeys'] = self.evaluation_metric_keys
         return result
 
 
 @dataclass(frozen=True)
-class AIJudgeConfig:
+class AIJudgeConfig(AIConfig):
     """
     Judge-specific AI Config with required evaluation metric key.
     """
-    enabled: bool
     evaluation_metric_keys: List[str]
-    model: Optional[ModelConfig] = None
     messages: Optional[List[LDMessage]] = None
-    provider: Optional[ProviderConfig] = None
-    tracker: Optional[LDAIConfigTracker] = None
 
     def to_dict(self) -> dict:
         """
         Render the given judge config as a dictionary object.
         """
-        result = {
-            '_ldMeta': {
-                'enabled': self.enabled,
-            },
-            'evaluationMetricKeys': self.evaluation_metric_keys,
-            'model': self.model.to_dict() if self.model else None,
-            'messages': [message.to_dict() for message in self.messages] if self.messages else None,
-            'provider': self.provider.to_dict() if self.provider else None,
-        }
+        result = self._base_to_dict()
+        result['evaluationMetricKeys'] = self.evaluation_metric_keys
+        result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
         return result
 
 

From 0d933d2d9b0721339a77f2d656aaa50a74fb7d2a Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 21:26:51 +0000
Subject: [PATCH 04/28] added deprecations for old types

---
 ldai/__init__.py | 10 ++++++++++
 ldai/models.py   | 25 +++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/ldai/__init__.py b/ldai/__init__.py
index 11369c9..222c007 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -18,6 +18,11 @@
     LDMessage,
     ModelConfig,
     ProviderConfig,
+    # Deprecated aliases for backward compatibility
+    AIConfig,
+    LDAIAgent,
+    LDAIAgentConfig,
+    LDAIAgentDefaults,
 )
 
 __all__ = [
@@ -35,4 +40,9 @@
     'LDMessage',
     'ModelConfig',
     'ProviderConfig',
+    # Deprecated exports
+    'AIConfig',
+    'LDAIAgent',
+    'LDAIAgentConfig',
+    'LDAIAgentDefaults',
 ]
diff --git a/ldai/models.py b/ldai/models.py
index e8ddf21..f83964a 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -1,3 +1,4 @@
+import warnings
 from dataclasses import dataclass
 from typing import Any, Dict, List, Literal, Optional
 
@@ -333,3 +334,27 @@ class AIAgentConfigRequest:
 # Type alias for multiple agents
 AIAgents = Dict[str, AIAgentConfig]
 
+
+# ============================================================================
+# Deprecated Type Aliases for Backward Compatibility
+# ============================================================================
+
+# Note: These are type aliases that point to the new types.
+# Since Python uses duck typing, these will work at runtime even if type checkers complain.
+# The old AIConfig had optional enabled, so it maps to AICompletionConfigDefault
+# The old AIConfig return type had required enabled, so it maps to AICompletionConfig
+
+# Deprecated: Use AICompletionConfigDefault instead
+# This was the old AIConfig with optional enabled (used as input/default)
+# Note: We map to AICompletionConfigDefault since the old AIConfig had optional enabled
+AIConfig = AICompletionConfigDefault
+
+# Deprecated: Use AIAgentConfigDefault instead
+LDAIAgentDefaults = AIAgentConfigDefault
+
+# Deprecated: Use AIAgentConfigRequest instead
+LDAIAgentConfig = AIAgentConfigRequest
+
+# Deprecated: Use AIAgentConfig instead (note: this was the old return type)
+LDAIAgent = AIAgentConfig
+

From 82718075cdbab1410729a52ba5f8950a866e5e76 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 21:35:34 +0000
Subject: [PATCH 05/28] create the ai provider interface and factory

---
 ldai/models.py                        |   5 +-
 ldai/providers/__init__.py            |  11 ++
 ldai/providers/ai_provider.py         |  96 ++++++++++++++++
 ldai/providers/ai_provider_factory.py | 154 ++++++++++++++++++++++++++
 ldai/providers/types.py               |  37 +++++++
 5 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 ldai/providers/__init__.py
 create mode 100644 ldai/providers/ai_provider.py
 create mode 100644 ldai/providers/ai_provider_factory.py
 create mode 100644 ldai/providers/types.py

diff --git a/ldai/models.py b/ldai/models.py
index f83964a..0b961f7 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -1,6 +1,6 @@
 import warnings
 from dataclasses import dataclass
-from typing import Any, Dict, List, Literal, Optional
+from typing import Any, Dict, List, Literal, Optional, Union
 
 from ldai.tracker import LDAIConfigTracker
 
@@ -334,6 +334,9 @@ class AIAgentConfigRequest:
 # Type alias for multiple agents
 AIAgents = Dict[str, AIAgentConfig]
 
+# Type alias for all AI Config variants
+AIConfigKind = Union[AIAgentConfig, AICompletionConfig, AIJudgeConfig]
+
 
 # ============================================================================
 # Deprecated Type Aliases for Backward Compatibility
diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py
new file mode 100644
index 0000000..8cac547
--- /dev/null
+++ b/ldai/providers/__init__.py
@@ -0,0 +1,11 @@
+"""AI Provider interfaces and factory for LaunchDarkly AI SDK."""
+
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
+
+__all__ = [
+    'AIProvider',
+    'AIProviderFactory',
+    'SupportedAIProvider',
+]
+
diff --git a/ldai/providers/ai_provider.py b/ldai/providers/ai_provider.py
new file mode 100644
index 0000000..5863a74
--- /dev/null
+++ b/ldai/providers/ai_provider.py
@@ -0,0 +1,96 @@
+"""Abstract base class for AI providers."""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional, Union
+
+from ldai.models import AIConfigKind, LDMessage
+from ldai.providers.types import ChatResponse, StructuredResponse
+
+
+class AIProvider(ABC):
+    """
+    Abstract base class for AI providers that implement chat model functionality.
+    
+    This class provides the contract that all provider implementations must follow
+    to integrate with LaunchDarkly's tracking and configuration capabilities.
+    
+    Following the AICHAT spec recommendation to use base classes with non-abstract methods
+    for better extensibility and backwards compatibility.
+    """
+
+    def __init__(self, logger: Optional[Any] = None):
+        """
+        Initialize the AI provider.
+        
+        :param logger: Optional logger for logging provider operations.
+        """
+        self.logger = logger
+
+    async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
+        """
+        Invoke the chat model with an array of messages.
+        
+        This method should convert messages to provider format, invoke the model,
+        and return a ChatResponse with the result and metrics.
+        
+        Default implementation takes no action and returns a placeholder response.
+        Provider implementations should override this method.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :return: ChatResponse containing the model's response
+        """
+        if self.logger:
+            self.logger.warn('invokeModel not implemented by this provider')
+        
+        from ldai.models import LDMessage
+        from ldai.providers.types import LDAIMetrics
+        
+        return ChatResponse(
+            message=LDMessage(role='assistant', content=''),
+            metrics=LDAIMetrics(success=False, usage=None),
+        )
+
+    async def invoke_structured_model(
+        self,
+        messages: List[LDMessage],
+        response_structure: Dict[str, Any],
+    ) -> StructuredResponse:
+        """
+        Invoke the chat model with structured output support.
+        
+        This method should convert messages to provider format, invoke the model with
+        structured output configuration, and return a structured response.
+        
+        Default implementation takes no action and returns a placeholder response.
+        Provider implementations should override this method.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :param response_structure: Dictionary of output configurations keyed by output name
+        :return: StructuredResponse containing the structured data
+        """
+        if self.logger:
+            self.logger.warn('invokeStructuredModel not implemented by this provider')
+        
+        from ldai.providers.types import LDAIMetrics
+        
+        return StructuredResponse(
+            data={},
+            raw_response='',
+            metrics=LDAIMetrics(success=False, usage=None),
+        )
+
+    @staticmethod
+    @abstractmethod
+    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'AIProvider':
+        """
+        Static method that constructs an instance of the provider.
+        
+        Each provider implementation must provide their own static create method
+        that accepts an AIConfigKind and returns a configured instance.
+        
+        :param ai_config: The LaunchDarkly AI configuration
+        :param logger: Optional logger for the provider
+        :return: Configured provider instance
+        """
+        raise NotImplementedError('Provider implementations must override the static create method')
+
diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py
new file mode 100644
index 0000000..dab3796
--- /dev/null
+++ b/ldai/providers/ai_provider_factory.py
@@ -0,0 +1,154 @@
+"""Factory for creating AIProvider instances based on the provider configuration."""
+
+import importlib
+from typing import Any, List, Literal, Optional, Type
+
+from ldai.models import AIConfigKind
+from ldai.providers.ai_provider import AIProvider
+
+
+# List of supported AI providers
+SUPPORTED_AI_PROVIDERS = [
+    # Multi-provider packages should be last in the list
+    'langchain',
+]
+
+# Type representing the supported AI providers
+SupportedAIProvider = Literal['langchain']
+
+
+class AIProviderFactory:
+    """
+    Factory for creating AIProvider instances based on the provider configuration.
+    """
+
+    @staticmethod
+    async def create(
+        ai_config: AIConfigKind,
+        logger: Optional[Any] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Optional[AIProvider]:
+        """
+        Create an AIProvider instance based on the AI configuration.
+        
+        This method attempts to load provider-specific implementations dynamically.
+        Returns None if the provider is not supported.
+        
+        :param ai_config: The AI configuration
+        :param logger: Optional logger for logging provider initialization
+        :param default_ai_provider: Optional default AI provider to use
+        :return: AIProvider instance or None if not supported
+        """
+        provider_name = ai_config.provider.name.lower() if ai_config.provider else None
+        # Determine which providers to try based on default_ai_provider
+        providers_to_try = AIProviderFactory._get_providers_to_try(default_ai_provider, provider_name)
+
+        # Try each provider in order
+        for provider_type in providers_to_try:
+            provider = await AIProviderFactory._try_create_provider(provider_type, ai_config, logger)
+            if provider:
+                return provider
+
+        # If no provider was successfully created, log a warning
+        if logger:
+            logger.warn(
+                f"Provider is not supported or failed to initialize: {provider_name or 'unknown'}"
+            )
+        return None
+
+    @staticmethod
+    def _get_providers_to_try(
+        default_ai_provider: Optional[SupportedAIProvider],
+        provider_name: Optional[str],
+    ) -> List[SupportedAIProvider]:
+        """
+        Determine which providers to try based on default_ai_provider and provider_name.
+        
+        :param default_ai_provider: Optional default provider to use
+        :param provider_name: Optional provider name from config
+        :return: List of providers to try in order
+        """
+        # If default_ai_provider is set, only try that specific provider
+        if default_ai_provider:
+            return [default_ai_provider]
+
+        # If no default_ai_provider is set, try all providers in order
+        provider_set = set()
+
+        # First try the specific provider if it's supported
+        if provider_name and provider_name in SUPPORTED_AI_PROVIDERS:
+            provider_set.add(provider_name)  # type: ignore
+
+        # Then try multi-provider packages, but avoid duplicates
+        multi_provider_packages: List[SupportedAIProvider] = ['langchain', 'vercel']
+        for provider in multi_provider_packages:
+            provider_set.add(provider)
+
+        return list(provider_set)
+
+    @staticmethod
+    async def _try_create_provider(
+        provider_type: SupportedAIProvider,
+        ai_config: AIConfigKind,
+        logger: Optional[Any] = None,
+    ) -> Optional[AIProvider]:
+        """
+        Try to create a provider of the specified type.
+        
+        :param provider_type: Type of provider to create
+        :param ai_config: AI configuration
+        :param logger: Optional logger
+        :return: AIProvider instance or None if creation failed
+        """
+        provider_mappings = {
+            'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'),
+            'langchain': ('launchdarkly_server_sdk_ai_langchain', 'LangChainProvider'),
+            'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'),
+        }
+
+        if provider_type not in provider_mappings:
+            return None
+
+        package_name, provider_class_name = provider_mappings[provider_type]
+        return await AIProviderFactory._create_provider(
+            package_name, provider_class_name, ai_config, logger
+        )
+
+    @staticmethod
+    async def _create_provider(
+        package_name: str,
+        provider_class_name: str,
+        ai_config: AIConfigKind,
+        logger: Optional[Any] = None,
+    ) -> Optional[AIProvider]:
+        """
+        Create a provider instance dynamically.
+        
+        :param package_name: Name of the package containing the provider
+        :param provider_class_name: Name of the provider class
+        :param ai_config: AI configuration
+        :param logger: Optional logger
+        :return: AIProvider instance or None if creation failed
+        """
+        try:
+            # Try to dynamically import the provider
+            # This will work if the package is installed
+            module = importlib.import_module(package_name)
+            provider_class: Type[AIProvider] = getattr(module, provider_class_name)
+
+            provider = await provider_class.create(ai_config, logger)
+            if logger:
+                logger.debug(
+                    f"Successfully created AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
+                    f"with package {package_name}"
+                )
+            return provider
+        except (ImportError, AttributeError, Exception) as error:
+            # If the provider is not available or creation fails, return None
+            if logger:
+                logger.warn(
+                    f"Error creating AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} "
+                    f"with package {package_name}: {error}"
+                )
+            return None
+
diff --git a/ldai/providers/types.py b/ldai/providers/types.py
new file mode 100644
index 0000000..4bfd692
--- /dev/null
+++ b/ldai/providers/types.py
@@ -0,0 +1,37 @@
+"""Types for AI provider responses."""
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+from ldai.models import LDMessage
+from ldai.tracker import TokenUsage
+
+
+@dataclass
+class LDAIMetrics:
+    """
+    Metrics information for AI operations that includes success status and token usage.
+    """
+    success: bool
+    usage: Optional[TokenUsage] = None
+
+
+@dataclass
+class ChatResponse:
+    """
+    Chat response structure.
+    """
+    message: LDMessage
+    metrics: LDAIMetrics
+    evaluations: Optional[List[Any]] = None  # List of JudgeResponse, will be populated later
+
+
+@dataclass
+class StructuredResponse:
+    """
+    Structured response from AI models.
+    """
+    data: Dict[str, Any]
+    raw_response: str
+    metrics: LDAIMetrics
+

From 6ee62b45e4e6559ba127f5ed5deae72a1d06eebe Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Fri, 7 Nov 2025 22:51:41 +0000
Subject: [PATCH 06/28] create a langchain implementation of the ai provider

---
 ldai/providers/__init__.py            |  20 +-
 ldai/providers/ai_provider_factory.py |  21 +-
 ldai/providers/langchain/__init__.py  | 284 ++++++++++++++++++++++++++
 3 files changed, 316 insertions(+), 9 deletions(-)
 create mode 100644 ldai/providers/langchain/__init__.py

diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py
index 8cac547..1beffb4 100644
--- a/ldai/providers/__init__.py
+++ b/ldai/providers/__init__.py
@@ -3,9 +3,19 @@
 from ldai.providers.ai_provider import AIProvider
 from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
 
-__all__ = [
-    'AIProvider',
-    'AIProviderFactory',
-    'SupportedAIProvider',
-]
+# Export LangChain provider if available
+try:
+    from ldai.providers.langchain import LangChainProvider
+    __all__ = [
+        'AIProvider',
+        'AIProviderFactory',
+        'LangChainProvider',
+        'SupportedAIProvider',
+    ]
+except ImportError:
+    __all__ = [
+        'AIProvider',
+        'AIProviderFactory',
+        'SupportedAIProvider',
+    ]
 
diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py
index dab3796..41cc1c2 100644
--- a/ldai/providers/ai_provider_factory.py
+++ b/ldai/providers/ai_provider_factory.py
@@ -80,7 +80,7 @@ def _get_providers_to_try(
             provider_set.add(provider_name)  # type: ignore
 
         # Then try multi-provider packages, but avoid duplicates
-        multi_provider_packages: List[SupportedAIProvider] = ['langchain', 'vercel']
+        multi_provider_packages: List[SupportedAIProvider] = ['langchain']
         for provider in multi_provider_packages:
             provider_set.add(provider)
 
@@ -100,10 +100,23 @@ async def _try_create_provider(
         :param logger: Optional logger
         :return: AIProvider instance or None if creation failed
         """
+        # Handle built-in providers (part of this package)
+        if provider_type == 'langchain':
+            try:
+                from ldai.providers.langchain import LangChainProvider
+                return await LangChainProvider.create(ai_config, logger)
+            except ImportError as error:
+                if logger:
+                    logger.warn(
+                        f"Error creating LangChainProvider: {error}. "
+                        f"Make sure langchain and langchain-core packages are installed."
+                    )
+                return None
+
+        # For future external providers, use dynamic import
         provider_mappings = {
-            'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'),
-            'langchain': ('launchdarkly_server_sdk_ai_langchain', 'LangChainProvider'),
-            'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'),
+            # 'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'),
+            # 'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'),
         }
 
         if provider_type not in provider_mappings:
diff --git a/ldai/providers/langchain/__init__.py b/ldai/providers/langchain/__init__.py
new file mode 100644
index 0000000..af84dc8
--- /dev/null
+++ b/ldai/providers/langchain/__init__.py
@@ -0,0 +1,284 @@
+"""LangChain implementation of AIProvider for LaunchDarkly AI SDK."""
+
+from typing import Any, Dict, List, Optional
+
+from langchain_core.chat_models import BaseChatModel
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
+
+from ldai.models import AIConfigKind, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse
+from ldai.tracker import TokenUsage
+
+
+class LangChainProvider(AIProvider):
+    """
+    LangChain implementation of AIProvider.
+    
+    This provider integrates LangChain models with LaunchDarkly's tracking capabilities.
+    """
+
+    def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None):
+        """
+        Initialize the LangChain provider.
+        
+        :param llm: LangChain BaseChatModel instance
+        :param logger: Optional logger for logging provider operations
+        """
+        super().__init__(logger)
+        self._llm = llm
+
+    # =============================================================================
+    # MAIN FACTORY METHOD
+    # =============================================================================
+
+    @staticmethod
+    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider':
+        """
+        Static factory method to create a LangChain AIProvider from an AI configuration.
+        
+        :param ai_config: The LaunchDarkly AI configuration
+        :param logger: Optional logger for the provider
+        :return: Configured LangChainProvider instance
+        """
+        llm = await LangChainProvider.create_langchain_model(ai_config)
+        return LangChainProvider(llm, logger)
+
+    # =============================================================================
+    # INSTANCE METHODS (AIProvider Implementation)
+    # =============================================================================
+
+    async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
+        """
+        Invoke the LangChain model with an array of messages.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :return: ChatResponse containing the model's response
+        """
+        try:
+            # Convert LDMessage[] to LangChain messages
+            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
+
+            # Get the LangChain response
+            response: AIMessage = await self._llm.ainvoke(langchain_messages)
+
+            # Generate metrics early (assumes success by default)
+            metrics = LangChainProvider.get_ai_metrics_from_response(response)
+
+            # Extract text content from the response
+            content: str = ''
+            if isinstance(response.content, str):
+                content = response.content
+            else:
+                # Log warning for non-string content (likely multimodal)
+                if self.logger:
+                    self.logger.warn(
+                        f"Multimodal response not supported, expecting a string. "
+                        f"Content type: {type(response.content)}, Content: {response.content}"
+                    )
+                # Update metrics to reflect content loss
+                metrics.success = False
+
+            # Create the assistant message
+            from ldai.models import LDMessage
+            assistant_message = LDMessage(role='assistant', content=content)
+
+            return ChatResponse(
+                message=assistant_message,
+                metrics=metrics,
+            )
+        except Exception as error:
+            if self.logger:
+                self.logger.warn(f'LangChain model invocation failed: {error}')
+
+            from ldai.models import LDMessage
+            return ChatResponse(
+                message=LDMessage(role='assistant', content=''),
+                metrics=LDAIMetrics(success=False, usage=None),
+            )
+
+    async def invoke_structured_model(
+        self,
+        messages: List[LDMessage],
+        response_structure: Dict[str, Any],
+    ) -> StructuredResponse:
+        """
+        Invoke the LangChain model with structured output support.
+        
+        :param messages: Array of LDMessage objects representing the conversation
+        :param response_structure: Dictionary of output configurations keyed by output name
+        :return: StructuredResponse containing the structured data
+        """
+        try:
+            # Convert LDMessage[] to LangChain messages
+            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
+
+            # Get the LangChain response with structured output
+            # Note: with_structured_output is available on BaseChatModel in newer LangChain versions
+            if hasattr(self._llm, 'with_structured_output'):
+                structured_llm = self._llm.with_structured_output(response_structure)
+                response = await structured_llm.ainvoke(langchain_messages)
+            else:
+                # Fallback: invoke normally and try to parse as JSON
+                response_obj = await self._llm.ainvoke(langchain_messages)
+                if isinstance(response_obj, AIMessage):
+                    import json
+                    try:
+                        response = json.loads(response_obj.content)
+                    except json.JSONDecodeError:
+                        response = {'content': response_obj.content}
+                else:
+                    response = response_obj
+
+            # Using structured output doesn't support metrics
+            metrics = LDAIMetrics(
+                success=True,
+                usage=TokenUsage(total=0, input=0, output=0),
+            )
+
+            import json
+            return StructuredResponse(
+                data=response if isinstance(response, dict) else {'result': response},
+                raw_response=json.dumps(response) if not isinstance(response, str) else response,
+                metrics=metrics,
+            )
+        except Exception as error:
+            if self.logger:
+                self.logger.warn(f'LangChain structured model invocation failed: {error}')
+
+            return StructuredResponse(
+                data={},
+                raw_response='',
+                metrics=LDAIMetrics(
+                    success=False,
+                    usage=TokenUsage(total=0, input=0, output=0),
+                ),
+            )
+
+    def get_chat_model(self) -> BaseChatModel:
+        """
+        Get the underlying LangChain model instance.
+        
+        :return: The LangChain BaseChatModel instance
+        """
+        return self._llm
+
+    # =============================================================================
+    # STATIC UTILITY METHODS
+    # =============================================================================
+
+    @staticmethod
+    def map_provider(ld_provider_name: str) -> str:
+        """
+        Map LaunchDarkly provider names to LangChain provider names.
+        
+        This method enables seamless integration between LaunchDarkly's standardized
+        provider naming and LangChain's naming conventions.
+        
+        :param ld_provider_name: LaunchDarkly provider name
+        :return: LangChain provider name
+        """
+        lowercased_name = ld_provider_name.lower()
+
+        mapping: Dict[str, str] = {
+            'gemini': 'google-genai',
+        }
+
+        return mapping.get(lowercased_name, lowercased_name)
+
+    @staticmethod
+    def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
+        """
+        Get AI metrics from a LangChain provider response.
+        
+        This method extracts token usage information and success status from LangChain responses
+        and returns a LaunchDarkly LDAIMetrics object.
+        
+        :param response: The response from the LangChain model
+        :return: LDAIMetrics with success status and token usage
+        """
+        # Extract token usage if available
+        usage: Optional[TokenUsage] = None
+        if hasattr(response, 'response_metadata') and response.response_metadata:
+            token_usage = response.response_metadata.get('token_usage')
+            if token_usage:
+                usage = TokenUsage(
+                    total=token_usage.get('total_tokens', 0) or token_usage.get('totalTokens', 0) or 0,
+                    input=token_usage.get('prompt_tokens', 0) or token_usage.get('promptTokens', 0) or 0,
+                    output=token_usage.get('completion_tokens', 0) or token_usage.get('completionTokens', 0) or 0,
+                )
+
+        # LangChain responses that complete successfully are considered successful by default
+        return LDAIMetrics(success=True, usage=usage)
+
+    @staticmethod
+    def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]:
+        """
+        Convert LaunchDarkly messages to LangChain messages.
+        
+        This helper method enables developers to work directly with LangChain message types
+        while maintaining compatibility with LaunchDarkly's standardized message format.
+        
+        :param messages: List of LDMessage objects
+        :return: List of LangChain message objects
+        """
+        result: List[BaseMessage] = []
+        for msg in messages:
+            if msg.role == 'system':
+                result.append(SystemMessage(content=msg.content))
+            elif msg.role == 'user':
+                result.append(HumanMessage(content=msg.content))
+            elif msg.role == 'assistant':
+                result.append(AIMessage(content=msg.content))
+            else:
+                raise ValueError(f'Unsupported message role: {msg.role}')
+        return result
+
+    @staticmethod
+    async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
+        """
+        Create a LangChain model from an AI configuration.
+        
+        This public helper method enables developers to initialize their own LangChain models
+        using LaunchDarkly AI configurations.
+        
+        :param ai_config: The LaunchDarkly AI configuration
+        :return: A configured LangChain BaseChatModel
+        """
+        model_name = ai_config.model.name if ai_config.model else ''
+        provider = ai_config.provider.name if ai_config.provider else ''
+        parameters = ai_config.model.get_parameter('parameters') if ai_config.model else {}
+        if not isinstance(parameters, dict):
+            parameters = {}
+
+        # Use LangChain's init_chat_model to support multiple providers
+        # Note: This requires langchain package to be installed
+        try:
+            # Try to import init_chat_model from langchain.chat_models
+            # This is available in langchain >= 0.1.0
+            try:
+                from langchain.chat_models import init_chat_model
+            except ImportError:
+                # Fallback for older versions or different import path
+                from langchain.chat_models.universal import init_chat_model
+            
+            # Map provider name
+            langchain_provider = LangChainProvider.map_provider(provider)
+            
+            # Create model configuration
+            model_kwargs = {**parameters}
+            if langchain_provider:
+                model_kwargs['model_provider'] = langchain_provider
+            
+            # Initialize the chat model (init_chat_model may be async or sync)
+            result = init_chat_model(model_name, **model_kwargs)
+            # Handle both sync and async initialization
+            if hasattr(result, '__await__'):
+                return await result
+            return result
+        except ImportError as e:
+            raise ImportError(
+                'langchain package is required for LangChainProvider. '
+                'Install it with: pip install langchain langchain-core'
+            ) from e
+

From 231ae2e226766cb4ec2d11d2d0e69f34a792718c Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 01:15:01 +0000
Subject: [PATCH 07/28] Add Judge and evaluation metric tracking

---
 ldai/__init__.py                        |  11 +-
 ldai/client.py                          |  78 +++++++-
 ldai/judge/__init__.py                  | 231 ++++++++++++++++++++++++
 ldai/judge/evaluation_schema_builder.py |  73 ++++++++
 ldai/models.py                          |  38 ++--
 ldai/providers/types.py                 |  19 ++
 ldai/tracker.py                         | 101 ++++++++++-
 7 files changed, 523 insertions(+), 28 deletions(-)
 create mode 100644 ldai/judge/__init__.py
 create mode 100644 ldai/judge/evaluation_schema_builder.py

diff --git a/ldai/__init__.py b/ldai/__init__.py
index 222c007..bba0bb1 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -13,7 +13,6 @@
     AICompletionConfigDefault,
     AIJudgeConfig,
     AIJudgeConfigDefault,
-    Judge,
     JudgeConfiguration,
     LDMessage,
     ModelConfig,
@@ -25,6 +24,12 @@
     LDAIAgentDefaults,
 )
 
+# Export judge
+from ldai.judge import AIJudge
+
+# Export judge types
+from ldai.providers.types import EvalScore, JudgeResponse
+
 __all__ = [
     'LDAIClient',
     'AIAgentConfig',
@@ -35,8 +40,10 @@
     'AICompletionConfigDefault',
     'AIJudgeConfig',
     'AIJudgeConfigDefault',
-    'Judge',
+    'AIJudge',
+    'EvalScore',
     'JudgeConfiguration',
+    'JudgeResponse',
     'LDMessage',
     'ModelConfig',
     'ProviderConfig',
diff --git a/ldai/client.py b/ldai/client.py
index 4f3cc9e..248fcb6 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -4,6 +4,7 @@
 from ldclient import Context
 from ldclient.client import LDClient
 
+from ldai.judge import AIJudge
 from ldai.models import (
     AIAgentConfig,
     AIAgentConfigDefault,
@@ -13,12 +14,12 @@
     AICompletionConfigDefault,
     AIJudgeConfig,
     AIJudgeConfigDefault,
-    Judge,
     JudgeConfiguration,
     LDMessage,
     ModelConfig,
     ProviderConfig,
 )
+from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
 from ldai.tracker import LDAIConfigTracker
 
 
@@ -118,6 +119,79 @@ def judge_config(
 
         return config
 
+    async def create_judge(
+        self,
+        key: str,
+        context: Context,
+        default_value: AIJudgeConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Optional[AIJudge]:
+        """
+        Creates and returns a new Judge instance for AI evaluation.
+
+        :param key: The key identifying the AI judge configuration to use
+        :param context: Standard Context used when evaluating flags
+        :param default_value: A default value representing a standard AI config result
+        :param variables: Dictionary of values for instruction interpolation.
+            The variables `message_history` and `response_to_evaluate` are reserved for the judge and will be ignored.
+        :param default_ai_provider: Optional default AI provider to use.
+        :return: Judge instance or None if disabled/unsupported
+
+        Example::
+
+            judge = client.create_judge(
+                "relevance-judge",
+                context,
+                AIJudgeConfigDefault(
+                    enabled=True,
+                    model=ModelConfig("gpt-4"),
+                    provider=ProviderConfig("openai"),
+                    evaluation_metric_keys=['$ld:ai:judge:relevance'],
+                    messages=[LDMessage(role='system', content='You are a relevance judge.')]
+                ),
+                variables={'metric': "relevance"}
+            )
+
+            if judge:
+                result = await judge.evaluate("User question", "AI response")
+                if result and result.evals:
+                    relevance_eval = result.evals.get('$ld:ai:judge:relevance')
+                    if relevance_eval:
+                        print('Relevance score:', relevance_eval.score)
+        """
+        self._client.track('$ld:ai:judge:function:createJudge', context, key, 1)
+
+        try:
+            # Warn if reserved variables are provided
+            if variables:
+                if 'message_history' in variables:
+                    # Note: Python doesn't have a logger on the client, but we could add one
+                    pass  # Would log warning if logger available
+                if 'response_to_evaluate' in variables:
+                    pass  # Would log warning if logger available
+
+            # Overwrite reserved variables to ensure they remain as placeholders for judge evaluation
+            extended_variables = dict(variables) if variables else {}
+            extended_variables['message_history'] = '{{message_history}}'
+            extended_variables['response_to_evaluate'] = '{{response_to_evaluate}}'
+
+            judge_config = self.judge_config(key, context, default_value, extended_variables)
+
+            if not judge_config.enabled or not judge_config.tracker:
+                # Would log info if logger available
+                return None
+
+            # Create AI provider for the judge
+            provider = await AIProviderFactory.create(judge_config, None, default_ai_provider)
+            if not provider:
+                return None
+
+            return AIJudge(judge_config, judge_config.tracker, provider, None)
+        except Exception as error:
+            # Would log error if logger available
+            return None
+
     def agent_config(
         self,
         key: str,
@@ -337,7 +411,7 @@ def __evaluate(
             judge_config = variation['judgeConfiguration']
             if 'judges' in judge_config and isinstance(judge_config['judges'], list):
                 judges = [
-                    Judge(
+                    JudgeConfiguration.Judge(
                         key=judge['key'],
                         sampling_rate=judge['samplingRate']
                     )
diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py
new file mode 100644
index 0000000..323cd19
--- /dev/null
+++ b/ldai/judge/__init__.py
@@ -0,0 +1,231 @@
+"""Judge implementation for AI evaluation."""
+
+import random
+from typing import Any, Dict, Optional
+
+import chevron
+
+from ldai.models import AIJudgeConfig, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, EvalScore, JudgeResponse, StructuredResponse
+from ldai.tracker import LDAIConfigTracker
+from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder
+
+
+class AIJudge:
+    """
+    Judge implementation that handles evaluation functionality and conversation management.
+    
+    According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
+    other AI Configs using structured output.
+    """
+
+    def __init__(
+        self,
+        ai_config: AIJudgeConfig,
+        ai_config_tracker: LDAIConfigTracker,
+        ai_provider: AIProvider,
+        logger: Optional[Any] = None,
+    ):
+        """
+        Initialize the Judge.
+        
+        :param ai_config: The judge AI configuration
+        :param ai_config_tracker: The tracker for the judge configuration
+        :param ai_provider: The AI provider to use for evaluation
+        :param logger: Optional logger for logging
+        """
+        self._ai_config = ai_config
+        self._ai_config_tracker = ai_config_tracker
+        self._ai_provider = ai_provider
+        self._logger = logger
+        self._evaluation_response_structure = EvaluationSchemaBuilder.build(
+            ai_config.evaluation_metric_keys
+        )
+
+    async def evaluate(
+        self,
+        input_text: str,
+        output_text: str,
+        sampling_rate: float = 1.0,
+    ) -> Optional[JudgeResponse]:
+        """
+        Evaluates an AI response using the judge's configuration.
+        
+        :param input_text: The input prompt or question that was provided to the AI
+        :param output_text: The AI-generated response to be evaluated
+        :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
+        :return: Evaluation results or None if not sampled
+        """
+        try:
+            if not self._ai_config.evaluation_metric_keys or len(self._ai_config.evaluation_metric_keys) == 0:
+                if self._logger:
+                    self._logger.warn(
+                        'Judge configuration is missing required evaluationMetricKeys'
+                    )
+                return None
+
+            if not self._ai_config.messages:
+                if self._logger:
+                    self._logger.warn('Judge configuration must include messages')
+                return None
+
+            if random.random() > sampling_rate:
+                if self._logger:
+                    self._logger.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}')
+                return None
+
+            messages = self._construct_evaluation_messages(input_text, output_text)
+
+            # Track metrics of the structured model invocation
+            response = await self._ai_config_tracker.track_metrics_of(
+                lambda result: result.metrics,
+                lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure)
+            )
+
+            success = response.metrics.success
+
+            evals = self._parse_evaluation_response(response.data)
+
+            if len(evals) != len(self._ai_config.evaluation_metric_keys):
+                if self._logger:
+                    self._logger.warn('Judge evaluation did not return all evaluations')
+                success = False
+
+            return JudgeResponse(
+                evals=evals,
+                success=success,
+            )
+        except Exception as error:
+            if self._logger:
+                self._logger.error(f'Judge evaluation failed: {error}')
+            return JudgeResponse(
+                evals={},
+                success=False,
+                error=str(error) if isinstance(error, Exception) else 'Unknown error',
+            )
+
+    async def evaluate_messages(
+        self,
+        messages: list[LDMessage],
+        response: ChatResponse,
+        sampling_ratio: float = 1.0,
+    ) -> Optional[JudgeResponse]:
+        """
+        Evaluates an AI response from chat messages and response.
+        
+        :param messages: Array of messages representing the conversation history
+        :param response: The AI response to be evaluated
+        :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
+        :return: Evaluation results or None if not sampled
+        """
+        input_text = '\r\n'.join([msg.content for msg in messages]) if messages else ''
+        output_text = response.message.content
+
+        return await self.evaluate(input_text, output_text, sampling_ratio)
+
+    def get_ai_config(self) -> AIJudgeConfig:
+        """
+        Returns the AI Config used by this judge.
+        
+        :return: The judge AI configuration
+        """
+        return self._ai_config
+
+    def get_tracker(self) -> LDAIConfigTracker:
+        """
+        Returns the tracker associated with this judge.
+        
+        :return: The tracker for the judge configuration
+        """
+        return self._ai_config_tracker
+
+    def get_provider(self) -> AIProvider:
+        """
+        Returns the AI provider used by this judge.
+        
+        :return: The AI provider
+        """
+        return self._ai_provider
+
+    def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]:
+        """
+        Constructs evaluation messages by combining judge's config messages with input/output.
+        
+        :param input_text: The input text
+        :param output_text: The output text to evaluate
+        :return: List of messages for evaluation
+        """
+        if not self._ai_config.messages:
+            return []
+
+        messages: list[LDMessage] = []
+        for msg in self._ai_config.messages:
+            # Interpolate message content with reserved variables
+            content = self._interpolate_message(msg.content, {
+                'message_history': input_text,
+                'response_to_evaluate': output_text,
+            })
+            messages.append(LDMessage(role=msg.role, content=content))
+
+        return messages
+
+    def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str:
+        """
+        Interpolates message content with variables using Mustache templating.
+        
+        :param content: The message content template
+        :param variables: Variables to interpolate
+        :return: Interpolated message content
+        """
+        # Use chevron (Mustache) for templating, with no escaping
+        return chevron.render(content, variables)
+
+    def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]:
+        """
+        Parses the structured evaluation response from the AI provider.
+        
+        :param data: The structured response data
+        :return: Dictionary of evaluation scores keyed by metric key
+        """
+        results: Dict[str, EvalScore] = {}
+
+        if not data.get('evaluations') or not isinstance(data['evaluations'], dict):
+            if self._logger:
+                self._logger.warn('Invalid response: missing or invalid evaluations object')
+            return results
+
+        evaluations = data['evaluations']
+
+        for metric_key in self._ai_config.evaluation_metric_keys:
+            evaluation = evaluations.get(metric_key)
+
+            if not evaluation or not isinstance(evaluation, dict):
+                if self._logger:
+                    self._logger.warn(f'Missing evaluation for metric key: {metric_key}')
+                continue
+
+            score = evaluation.get('score')
+            reasoning = evaluation.get('reasoning')
+
+            if not isinstance(score, (int, float)) or score < 0 or score > 1:
+                if self._logger:
+                    self._logger.warn(
+                        f'Invalid score evaluated for {metric_key}: {score}. '
+                        'Score must be a number between 0 and 1 inclusive'
+                    )
+                continue
+
+            if not isinstance(reasoning, str):
+                if self._logger:
+                    self._logger.warn(
+                        f'Invalid reasoning evaluated for {metric_key}: {reasoning}. '
+                        'Reasoning must be a string'
+                    )
+                continue
+
+            results[metric_key] = EvalScore(score=float(score), reasoning=reasoning)
+
+        return results
+
+
diff --git a/ldai/judge/evaluation_schema_builder.py b/ldai/judge/evaluation_schema_builder.py
new file mode 100644
index 0000000..d8d8fa4
--- /dev/null
+++ b/ldai/judge/evaluation_schema_builder.py
@@ -0,0 +1,73 @@
+"""Internal class for building dynamic evaluation response schemas."""
+
+from typing import Any, Dict
+
+
+class EvaluationSchemaBuilder:
+    """
+    Internal class for building dynamic evaluation response schemas.
+    Not exported - only used internally by Judge.
+    """
+
+    @staticmethod
+    def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
+        """
+        Build an evaluation response schema from evaluation metric keys.
+        
+        :param evaluation_metric_keys: List of evaluation metric keys
+        :return: Schema dictionary for structured output
+        """
+        return {
+            'type': 'object',
+            'properties': {
+                'evaluations': {
+                    'type': 'object',
+                    'description': f"Object containing evaluation results for {', '.join(evaluation_metric_keys)} metrics",
+                    'properties': EvaluationSchemaBuilder._build_key_properties(evaluation_metric_keys),
+                    'required': evaluation_metric_keys,
+                    'additionalProperties': False,
+                },
+            },
+            'required': ['evaluations'],
+            'additionalProperties': False,
+        }
+
+    @staticmethod
+    def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
+        """
+        Build properties for each evaluation metric key.
+        
+        :param evaluation_metric_keys: List of evaluation metric keys
+        :return: Dictionary of properties for each key
+        """
+        result: Dict[str, Any] = {}
+        for key in evaluation_metric_keys:
+            result[key] = EvaluationSchemaBuilder._build_key_schema(key)
+        return result
+
+    @staticmethod
+    def _build_key_schema(key: str) -> Dict[str, Any]:
+        """
+        Build schema for a single evaluation metric key.
+        
+        :param key: Evaluation metric key
+        :return: Schema dictionary for the key
+        """
+        return {
+            'type': 'object',
+            'properties': {
+                'score': {
+                    'type': 'number',
+                    'minimum': 0,
+                    'maximum': 1,
+                    'description': f'Score between 0.0 and 1.0 for {key}',
+                },
+                'reasoning': {
+                    'type': 'string',
+                    'description': f'Reasoning behind the score for {key}',
+                },
+            },
+            'required': ['score', 'reasoning'],
+            'additionalProperties': False,
+        }
+
diff --git a/ldai/models.py b/ldai/models.py
index 0b961f7..4531f8f 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -105,30 +105,30 @@ def to_dict(self) -> dict:
 # Judge Types
 # ============================================================================
 
-@dataclass(frozen=True)
-class Judge:
-    """
-    Configuration for a single judge attachment.
-    """
-    key: str
-    sampling_rate: float
-
-    def to_dict(self) -> dict:
-        """
-        Render the judge as a dictionary object.
-        """
-        return {
-            'key': self.key,
-            'samplingRate': self.sampling_rate,
-        }
-
-
 @dataclass(frozen=True)
 class JudgeConfiguration:
     """
     Configuration for judge attachment to AI Configs.
     """
-    judges: List[Judge]
+    
+    @dataclass(frozen=True)
+    class Judge:
+        """
+        Configuration for a single judge attachment.
+        """
+        key: str
+        sampling_rate: float
+
+        def to_dict(self) -> dict:
+            """
+            Render the judge as a dictionary object.
+            """
+            return {
+                'key': self.key,
+                'samplingRate': self.sampling_rate,
+            }
+    
+    judges: List['JudgeConfiguration.Judge']
 
     def to_dict(self) -> dict:
         """
diff --git a/ldai/providers/types.py b/ldai/providers/types.py
index 4bfd692..58ca3fc 100644
--- a/ldai/providers/types.py
+++ b/ldai/providers/types.py
@@ -35,3 +35,22 @@ class StructuredResponse:
     raw_response: str
     metrics: LDAIMetrics
 
+
+@dataclass
+class EvalScore:
+    """
+    Score and reasoning for a single evaluation metric.
+    """
+    score: float  # Score between 0.0 and 1.0
+    reasoning: str  # Reasoning behind the provided score
+
+
+@dataclass
+class JudgeResponse:
+    """
+    Response from a judge evaluation containing scores and reasoning for multiple metrics.
+    """
+    evals: Dict[str, EvalScore]  # Dictionary where keys are metric names and values contain score and reasoning
+    success: bool  # Whether the evaluation completed successfully
+    error: Optional[str] = None  # Error message if evaluation failed
+
diff --git a/ldai/tracker.py b/ldai/tracker.py
index a049952..632f0f4 100644
--- a/ldai/tracker.py
+++ b/ldai/tracker.py
@@ -1,7 +1,7 @@
 import time
 from dataclasses import dataclass
 from enum import Enum
-from typing import Dict, Optional
+from typing import Any, Dict, Optional
 
 from ldclient import Context, LDClient
 
@@ -144,7 +144,7 @@ def track_duration_of(self, func):
         An exception occurring during the execution of the function will still
         track the duration. The exception will be re-thrown.
 
-        :param func: Function to track.
+        :param func: Function to track (synchronous only).
         :return: Result of the tracked function.
         """
         start_time = time.time()
@@ -157,6 +157,90 @@ def track_duration_of(self, func):
 
         return result
 
+    async def track_metrics_of(self, metrics_extractor, func):
+        """
+        Track metrics for a generic AI operation.
+
+        This function will track the duration of the operation, extract metrics using the provided
+        metrics extractor function, and track success or error status accordingly.
+
+        If the provided function throws, then this method will also throw.
+        In the case the provided function throws, this function will record the duration and an error.
+        A failed operation will not have any token usage data.
+
+        :param metrics_extractor: Function that extracts LDAIMetrics from the operation result
+        :param func: Async function which executes the operation
+        :return: The result of the operation
+        """
+        start_time = time.time()
+        result = None
+        try:
+            result = await func()
+        except Exception as err:
+            end_time = time.time()
+            duration = int((end_time - start_time) * 1000)
+            self.track_duration(duration)
+            self.track_error()
+            raise err
+
+        # Track duration after successful call
+        end_time = time.time()
+        duration = int((end_time - start_time) * 1000)
+        self.track_duration(duration)
+
+        # Extract metrics after successful AI call
+        from ldai.providers.types import LDAIMetrics
+        metrics = metrics_extractor(result)
+
+        # Track success/error based on metrics
+        if metrics.success:
+            self.track_success()
+        else:
+            self.track_error()
+
+        # Track token usage if available
+        if metrics.usage:
+            self.track_tokens(metrics.usage)
+
+        return result
+
+    def track_eval_scores(self, scores: Dict[str, Any]) -> None:
+        """
+        Track evaluation scores for multiple metrics.
+
+        :param scores: Dictionary mapping metric keys to their evaluation scores (EvalScore objects)
+        """
+        from ldai.providers.types import EvalScore
+        
+        # Track each evaluation score individually
+        for metric_key, eval_score in scores.items():
+            if isinstance(eval_score, EvalScore):
+                self._ld_client.track(
+                    metric_key,
+                    self._context,
+                    self.__get_track_data(),
+                    eval_score.score
+                )
+
+    def track_judge_response(self, judge_response: Any) -> None:
+        """
+        Track a judge response, including evaluation scores and success status.
+
+        :param judge_response: JudgeResponse object containing evals and success status
+        """
+        from ldai.providers.types import JudgeResponse
+        
+        if isinstance(judge_response, JudgeResponse):
+            # Track evaluation scores
+            if judge_response.evals:
+                self.track_eval_scores(judge_response.evals)
+            
+            # Track success/error based on judge response
+            if judge_response.success:
+                self.track_success()
+            else:
+                self.track_error()
+
     def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None:
         """
         Track user feedback for an AI operation.
@@ -197,7 +281,7 @@ def track_error(self) -> None:
             "$ld:ai:generation:error", self._context, self.__get_track_data(), 1
         )
 
-    def track_openai_metrics(self, func):
+    async def track_openai_metrics(self, func):
         """
         Track OpenAI-specific operations.
 
@@ -211,15 +295,22 @@ def track_openai_metrics(self, func):
 
         A failed operation will not have any token usage data.
 
-        :param func: Function to track.
+        :param func: Async function to track.
         :return: Result of the tracked function.
         """
+        start_time = time.time()
         try:
-            result = self.track_duration_of(func)
+            result = await func()
+            end_time = time.time()
+            duration = int((end_time - start_time) * 1000)
+            self.track_duration(duration)
             self.track_success()
             if hasattr(result, "usage") and hasattr(result.usage, "to_dict"):
                 self.track_tokens(_openai_to_token_usage(result.usage.to_dict()))
         except Exception:
+            end_time = time.time()
+            duration = int((end_time - start_time) * 1000)
+            self.track_duration(duration)
             self.track_error()
             raise
 

From 445ab8c9c3488221ea39dfeec94cdd5235d8581e Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 03:43:01 +0000
Subject: [PATCH 08/28] Add Chat implementation

---
 ldai/__init__.py      |   4 +
 ldai/chat/__init__.py | 191 ++++++++++++++++++++++++++++++++++++++++++
 ldai/client.py        | 109 ++++++++++++++++++++++++
 3 files changed, 304 insertions(+)
 create mode 100644 ldai/chat/__init__.py

diff --git a/ldai/__init__.py b/ldai/__init__.py
index bba0bb1..617ac3a 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -27,6 +27,9 @@
 # Export judge
 from ldai.judge import AIJudge
 
+# Export chat
+from ldai.chat import TrackedChat
+
 # Export judge types
 from ldai.providers.types import EvalScore, JudgeResponse
 
@@ -41,6 +44,7 @@
     'AIJudgeConfig',
     'AIJudgeConfigDefault',
     'AIJudge',
+    'TrackedChat',
     'EvalScore',
     'JudgeConfiguration',
     'JudgeResponse',
diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
new file mode 100644
index 0000000..5bdf37a
--- /dev/null
+++ b/ldai/chat/__init__.py
@@ -0,0 +1,191 @@
+"""TrackedChat implementation for managing AI chat conversations."""
+
+from typing import Any, Dict, List, Optional
+
+from ldai.models import AICompletionConfig, LDMessage
+from ldai.providers.ai_provider import AIProvider
+from ldai.providers.types import ChatResponse, JudgeResponse
+from ldai.judge import AIJudge
+from ldai.tracker import LDAIConfigTracker
+
+
+class TrackedChat:
+    """
+    Concrete implementation of TrackedChat that provides chat functionality
+    by delegating to an AIProvider implementation.
+    
+    This class handles conversation management and tracking, while delegating
+    the actual model invocation to the provider.
+    """
+
+    def __init__(
+        self,
+        ai_config: AICompletionConfig,
+        tracker: LDAIConfigTracker,
+        provider: AIProvider,
+        judges: Optional[Dict[str, AIJudge]] = None,
+        logger: Optional[Any] = None,
+    ):
+        """
+        Initialize the TrackedChat.
+        
+        :param ai_config: The completion AI configuration
+        :param tracker: The tracker for the completion configuration
+        :param provider: The AI provider to use for chat
+        :param judges: Optional dictionary of judge instances keyed by their configuration keys
+        :param logger: Optional logger for logging
+        """
+        self._ai_config = ai_config
+        self._tracker = tracker
+        self._provider = provider
+        self._judges = judges or {}
+        self._logger = logger
+        self._messages: List[LDMessage] = []
+
+    async def invoke(self, prompt: str) -> ChatResponse:
+        """
+        Invoke the chat model with a prompt string.
+        
+        This method handles conversation management and tracking, delegating to the provider's invoke_model method.
+        
+        :param prompt: The user prompt to send to the chat model
+        :return: ChatResponse containing the model's response and metrics
+        """
+        # Convert prompt string to LDMessage with role 'user' and add to conversation history
+        user_message: LDMessage = LDMessage(role='user', content=prompt)
+        self._messages.append(user_message)
+
+        # Prepend config messages to conversation history for model invocation
+        config_messages = self._ai_config.messages or []
+        all_messages = config_messages + self._messages
+
+        # Delegate to provider-specific implementation with tracking
+        response = await self._tracker.track_metrics_of(
+            lambda result: result.metrics,
+            lambda: self._provider.invoke_model(all_messages),
+        )
+
+        # Evaluate with judges if configured
+        if (
+            self._ai_config.judge_configuration
+            and self._ai_config.judge_configuration.judges
+            and len(self._ai_config.judge_configuration.judges) > 0
+        ):
+            evaluations = await self._evaluate_with_judges(self._messages, response)
+            response.evaluations = evaluations
+
+        # Add the response message to conversation history
+        self._messages.append(response.message)
+        return response
+
+    async def _evaluate_with_judges(
+        self,
+        messages: List[LDMessage],
+        response: ChatResponse,
+    ) -> List[Optional[JudgeResponse]]:
+        """
+        Evaluates the response with all configured judges.
+        
+        Returns a list of evaluation results.
+        
+        :param messages: Array of messages representing the conversation history
+        :param response: The AI response to be evaluated
+        :return: List of judge evaluation results (may contain None for failed evaluations)
+        """
+        if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges:
+            return []
+
+        judge_configs = self._ai_config.judge_configuration.judges
+
+        # Start all judge evaluations in parallel
+        async def evaluate_judge(judge_config):
+            judge = self._judges.get(judge_config.key)
+            if not judge:
+                if self._logger:
+                    self._logger.warn(
+                        f"Judge configuration is not enabled: {judge_config.key}",
+                    )
+                return None
+
+            eval_result = await judge.evaluate_messages(
+                messages, response, judge_config.sampling_rate
+            )
+
+            if eval_result and eval_result.success:
+                self._tracker.track_eval_scores(eval_result.evals)
+
+            return eval_result
+
+        # Ensure all evaluations complete even if some fail
+        import asyncio
+        evaluation_promises = [evaluate_judge(judge_config) for judge_config in judge_configs]
+        results = await asyncio.gather(*evaluation_promises, return_exceptions=True)
+        
+        # Map exceptions to None
+        return [
+            None if isinstance(result, Exception) else result
+            for result in results
+        ]
+
+    def get_config(self) -> AICompletionConfig:
+        """
+        Get the underlying AI configuration used to initialize this TrackedChat.
+        
+        :return: The AI completion configuration
+        """
+        return self._ai_config
+
+    def get_tracker(self) -> LDAIConfigTracker:
+        """
+        Get the underlying AI configuration tracker used to initialize this TrackedChat.
+        
+        :return: The tracker instance
+        """
+        return self._tracker
+
+    def get_provider(self) -> AIProvider:
+        """
+        Get the underlying AI provider instance.
+        
+        This provides direct access to the provider for advanced use cases.
+        
+        :return: The AI provider instance
+        """
+        return self._provider
+
+    def get_judges(self) -> Dict[str, AIJudge]:
+        """
+        Get the judges associated with this TrackedChat.
+        
+        Returns a dictionary of judge instances keyed by their configuration keys.
+        
+        :return: Dictionary of judge instances
+        """
+        return self._judges
+
+    def append_messages(self, messages: List[LDMessage]) -> None:
+        """
+        Append messages to the conversation history.
+        
+        Adds messages to the conversation history without invoking the model,
+        which is useful for managing multi-turn conversations or injecting context.
+        
+        :param messages: Array of messages to append to the conversation history
+        """
+        self._messages.extend(messages)
+
+    def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]:
+        """
+        Get all messages in the conversation history.
+        
+        :param include_config_messages: Whether to include the config messages from the AIConfig.
+                                       Defaults to False.
+        :return: Array of messages. When include_config_messages is True, returns both config
+                messages and conversation history with config messages prepended. When False,
+                returns only the conversation history messages.
+        """
+        if include_config_messages:
+            config_messages = self._ai_config.messages or []
+            return config_messages + self._messages
+        return list(self._messages)
+
diff --git a/ldai/client.py b/ldai/client.py
index 248fcb6..91649d7 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -4,6 +4,7 @@
 from ldclient import Context
 from ldclient.client import LDClient
 
+from ldai.chat import TrackedChat
 from ldai.judge import AIJudge
 from ldai.models import (
     AIAgentConfig,
@@ -192,6 +193,114 @@ async def create_judge(
             # Would log error if logger available
             return None
 
+    async def _initialize_judges(
+        self,
+        judge_configs: List[JudgeConfiguration.Judge],
+        context: Context,
+        variables: Optional[Dict[str, Any]] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Dict[str, AIJudge]:
+        """
+        Initialize judges from judge configurations.
+        
+        :param judge_configs: List of judge configurations
+        :param context: Standard Context used when evaluating flags
+        :param variables: Dictionary of values for instruction interpolation
+        :param default_ai_provider: Optional default AI provider to use
+        :return: Dictionary of judge instances keyed by their configuration keys
+        """
+        judges: Dict[str, AIJudge] = {}
+        
+        async def create_judge_for_config(judge_key: str):
+            judge = await self.create_judge(
+                judge_key,
+                context,
+                AIJudgeConfigDefault(enabled=False),
+                variables,
+                default_ai_provider,
+            )
+            return judge_key, judge
+        
+        judge_promises = [
+            create_judge_for_config(judge_config.key)
+            for judge_config in judge_configs
+        ]
+        
+        import asyncio
+        results = await asyncio.gather(*judge_promises, return_exceptions=True)
+        
+        for result in results:
+            if isinstance(result, Exception):
+                continue
+            judge_key, judge = result
+            if judge:
+                judges[judge_key] = judge
+        
+        return judges
+
+    async def create_chat(
+        self,
+        key: str,
+        context: Context,
+        default_value: AICompletionConfigDefault,
+        variables: Optional[Dict[str, Any]] = None,
+        default_ai_provider: Optional[SupportedAIProvider] = None,
+    ) -> Optional[TrackedChat]:
+        """
+        Creates and returns a new TrackedChat instance for AI chat conversations.
+
+        :param key: The key identifying the AI completion configuration to use
+        :param context: Standard Context used when evaluating flags
+        :param default_value: A default value representing a standard AI config result
+        :param variables: Dictionary of values for instruction interpolation
+        :param default_ai_provider: Optional default AI provider to use
+        :return: TrackedChat instance or None if disabled/unsupported
+
+        Example::
+
+            chat = await client.create_chat(
+                "customer-support-chat",
+                context,
+                AICompletionConfigDefault(
+                    enabled=True,
+                    model=ModelConfig("gpt-4"),
+                    provider=ProviderConfig("openai"),
+                    messages=[LDMessage(role='system', content='You are a helpful assistant.')]
+                ),
+                variables={'customerName': 'John'}
+            )
+
+            if chat:
+                response = await chat.invoke("I need help with my order")
+                print(response.message.content)
+                
+                # Access conversation history
+                messages = chat.get_messages()
+                print(f"Conversation has {len(messages)} messages")
+        """
+        self._client.track('$ld:ai:config:function:createChat', context, key, 1)
+
+        config = self.completion_config(key, context, default_value, variables)
+
+        if not config.enabled or not config.tracker:
+            # Would log info if logger available
+            return None
+
+        provider = await AIProviderFactory.create(config, None, default_ai_provider)
+        if not provider:
+            return None
+
+        judges = {}
+        if config.judge_configuration and config.judge_configuration.judges:
+            judges = await self._initialize_judges(
+                config.judge_configuration.judges,
+                context,
+                variables,
+                default_ai_provider,
+            )
+
+        return TrackedChat(config, config.tracker, provider, judges, None)
+
     def agent_config(
         self,
         key: str,

From 5446222a18f7f723cad0fad6b3a0386144dd23ad Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 03:53:43 +0000
Subject: [PATCH 09/28] Set a default for evaluation metircs

---
 ldai/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ldai/models.py b/ldai/models.py
index 4531f8f..fa36f8c 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -1,5 +1,5 @@
 import warnings
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Any, Dict, List, Literal, Optional, Union
 
 from ldai.tracker import LDAIConfigTracker
@@ -302,7 +302,7 @@ class AIJudgeConfig(AIConfig):
     """
     Judge-specific AI Config with required evaluation metric key.
     """
-    evaluation_metric_keys: List[str]
+    evaluation_metric_keys: List[str] = field(default_factory=list)
     messages: Optional[List[LDMessage]] = None
 
     def to_dict(self) -> dict:

From bc46608ad4bb66125412ffb23e9e8963804e1aac Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 17:24:24 +0000
Subject: [PATCH 10/28] add the logger

---
 ldai/client.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/ldai/client.py b/ldai/client.py
index 91649d7..4f4b6b9 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -1,3 +1,4 @@
+import logging
 from typing import Any, Dict, List, Optional, Tuple
 
 import chevron
@@ -29,6 +30,7 @@ class LDAIClient:
 
     def __init__(self, client: LDClient):
         self._client = client
+        self._logger = logging.getLogger('ldclient.ai')
 
     def completion_config(
         self,
@@ -184,11 +186,11 @@ async def create_judge(
                 return None
 
             # Create AI provider for the judge
-            provider = await AIProviderFactory.create(judge_config, None, default_ai_provider)
+            provider = await AIProviderFactory.create(judge_config, self._logger, default_ai_provider)
             if not provider:
                 return None
 
-            return AIJudge(judge_config, judge_config.tracker, provider, None)
+            return AIJudge(judge_config, judge_config.tracker, provider, self._logger)
         except Exception as error:
             # Would log error if logger available
             return None
@@ -279,14 +281,15 @@ async def create_chat(
                 print(f"Conversation has {len(messages)} messages")
         """
         self._client.track('$ld:ai:config:function:createChat', context, key, 1)
-
+        if self._logger:
+            self._logger.debug(f"Creating chat for key: {key}")
         config = self.completion_config(key, context, default_value, variables)
 
         if not config.enabled or not config.tracker:
             # Would log info if logger available
             return None
 
-        provider = await AIProviderFactory.create(config, None, default_ai_provider)
+        provider = await AIProviderFactory.create(config, self._logger, default_ai_provider)
         if not provider:
             return None
 
@@ -299,7 +302,7 @@ async def create_chat(
                 default_ai_provider,
             )
 
-        return TrackedChat(config, config.tracker, provider, judges, None)
+        return TrackedChat(config, config.tracker, provider, judges, self._logger)
 
     def agent_config(
         self,

From fd0aff476dbe6328906be71a734ac9c216d899b2 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 18:09:59 +0000
Subject: [PATCH 11/28] adjust langchain import

---
 ldai/providers/langchain/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ldai/providers/langchain/__init__.py b/ldai/providers/langchain/__init__.py
index af84dc8..f2e2c35 100644
--- a/ldai/providers/langchain/__init__.py
+++ b/ldai/providers/langchain/__init__.py
@@ -2,7 +2,7 @@
 
 from typing import Any, Dict, List, Optional
 
-from langchain_core.chat_models import BaseChatModel
+from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
 
 from ldai.models import AIConfigKind, LDMessage

From c3c939f1b70654596d3b0cff02b28d4c98859515 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 18:35:13 +0000
Subject: [PATCH 12/28] fix structure response

---
 ldai/judge/evaluation_schema_builder.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ldai/judge/evaluation_schema_builder.py b/ldai/judge/evaluation_schema_builder.py
index d8d8fa4..1965e64 100644
--- a/ldai/judge/evaluation_schema_builder.py
+++ b/ldai/judge/evaluation_schema_builder.py
@@ -18,6 +18,8 @@ def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
         :return: Schema dictionary for structured output
         """
         return {
+            'title': 'EvaluationResponse',
+            'description': f"Response containing evaluation results for {', '.join(evaluation_metric_keys)} metrics",
             'type': 'object',
             'properties': {
                 'evaluations': {

From 125bb66252083ca983a9b46a209425f77ae9923c Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Sat, 8 Nov 2025 18:59:13 +0000
Subject: [PATCH 13/28] judge respose should be async

---
 ldai/chat/__init__.py   | 34 ++++++++++++++++------------------
 ldai/providers/types.py | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
index 5bdf37a..0785c16 100644
--- a/ldai/chat/__init__.py
+++ b/ldai/chat/__init__.py
@@ -1,5 +1,6 @@
 """TrackedChat implementation for managing AI chat conversations."""
 
+import asyncio
 from typing import Any, Dict, List, Optional
 
 from ldai.models import AICompletionConfig, LDMessage
@@ -65,39 +66,39 @@ async def invoke(self, prompt: str) -> ChatResponse:
             lambda: self._provider.invoke_model(all_messages),
         )
 
-        # Evaluate with judges if configured
+        # Start judge evaluations as async tasks (don't await them)
         if (
             self._ai_config.judge_configuration
             and self._ai_config.judge_configuration.judges
             and len(self._ai_config.judge_configuration.judges) > 0
         ):
-            evaluations = await self._evaluate_with_judges(self._messages, response)
-            response.evaluations = evaluations
+            evaluation_tasks = self._start_judge_evaluations(self._messages, response)
+            response.evaluations = evaluation_tasks
 
         # Add the response message to conversation history
         self._messages.append(response.message)
         return response
 
-    async def _evaluate_with_judges(
+    def _start_judge_evaluations(
         self,
         messages: List[LDMessage],
         response: ChatResponse,
-    ) -> List[Optional[JudgeResponse]]:
+    ) -> List[asyncio.Task[Optional[JudgeResponse]]]:
         """
-        Evaluates the response with all configured judges.
+        Start judge evaluations as async tasks without awaiting them.
         
-        Returns a list of evaluation results.
+        Returns a list of async tasks that can be awaited later.
         
         :param messages: Array of messages representing the conversation history
         :param response: The AI response to be evaluated
-        :return: List of judge evaluation results (may contain None for failed evaluations)
+        :return: List of async tasks that will return judge evaluation results
         """
         if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges:
             return []
 
         judge_configs = self._ai_config.judge_configuration.judges
 
-        # Start all judge evaluations in parallel
+        # Start all judge evaluations as tasks
         async def evaluate_judge(judge_config):
             judge = self._judges.get(judge_config.key)
             if not judge:
@@ -116,16 +117,13 @@ async def evaluate_judge(judge_config):
 
             return eval_result
 
-        # Ensure all evaluations complete even if some fail
-        import asyncio
-        evaluation_promises = [evaluate_judge(judge_config) for judge_config in judge_configs]
-        results = await asyncio.gather(*evaluation_promises, return_exceptions=True)
-        
-        # Map exceptions to None
-        return [
-            None if isinstance(result, Exception) else result
-            for result in results
+        # Create tasks for each judge evaluation
+        tasks = [
+            asyncio.create_task(evaluate_judge(judge_config))
+            for judge_config in judge_configs
         ]
+        
+        return tasks
 
     def get_config(self) -> AICompletionConfig:
         """
diff --git a/ldai/providers/types.py b/ldai/providers/types.py
index 58ca3fc..45df755 100644
--- a/ldai/providers/types.py
+++ b/ldai/providers/types.py
@@ -15,6 +15,21 @@ class LDAIMetrics:
     success: bool
     usage: Optional[TokenUsage] = None
 
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the metrics as a dictionary object.
+        """
+        result: Dict[str, Any] = {
+            'success': self.success,
+        }
+        if self.usage is not None:
+            result['usage'] = {
+                'total': self.usage.total,
+                'input': self.usage.input,
+                'output': self.usage.output,
+            }
+        return result
+
 
 @dataclass
 class ChatResponse:
@@ -44,6 +59,15 @@ class EvalScore:
     score: float  # Score between 0.0 and 1.0
     reasoning: str  # Reasoning behind the provided score
 
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the evaluation score as a dictionary object.
+        """
+        return {
+            'score': self.score,
+            'reasoning': self.reasoning,
+        }
+
 
 @dataclass
 class JudgeResponse:
@@ -54,3 +78,15 @@ class JudgeResponse:
     success: bool  # Whether the evaluation completed successfully
     error: Optional[str] = None  # Error message if evaluation failed
 
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Render the judge response as a dictionary object.
+        """
+        result: Dict[str, Any] = {
+            'evals': {key: eval_score.to_dict() for key, eval_score in self.evals.items()},
+            'success': self.success,
+        }
+        if self.error is not None:
+            result['error'] = self.error
+        return result
+

From 63b1d9e29853bceb0e08305880593d499669b141 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Tue, 9 Dec 2025 18:46:00 +0100
Subject: [PATCH 14/28] fix test

---
 ldai/testing/test_model_config.py | 22 +++++++++++-----------
 ldai/testing/test_tracker.py      | 15 ++++++++++-----
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/ldai/testing/test_model_config.py b/ldai/testing/test_model_config.py
index b35389d..13c0523 100644
--- a/ldai/testing/test_model_config.py
+++ b/ldai/testing/test_model_config.py
@@ -140,7 +140,7 @@ def test_uses_default_on_invalid_flag(ldai_client: LDAIClient):
     )
     variables = {'name': 'World'}
 
-    config, _ = ldai_client.config('missing-flag', context, default_value, variables)
+    config = ldai_client.config('missing-flag', context, default_value, variables)
 
     assert config.messages is not None
     assert len(config.messages) > 0
@@ -162,7 +162,7 @@ def test_model_config_interpolation(ldai_client: LDAIClient):
     )
     variables = {'name': 'World'}
 
-    config, _ = ldai_client.config('model-config', context, default_value, variables)
+    config = ldai_client.config('model-config', context, default_value, variables)
 
     assert config.messages is not None
     assert len(config.messages) > 0
@@ -179,7 +179,7 @@ def test_model_config_no_variables(ldai_client: LDAIClient):
     context = Context.create('user-key')
     default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
 
-    config, _ = ldai_client.config('model-config', context, default_value, {})
+    config = ldai_client.config('model-config', context, default_value, {})
 
     assert config.messages is not None
     assert len(config.messages) > 0
@@ -197,7 +197,7 @@ def test_provider_config_handling(ldai_client: LDAIClient):
     default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World'}
 
-    config, _ = ldai_client.config('model-config', context, default_value, variables)
+    config = ldai_client.config('model-config', context, default_value, variables)
 
     assert config.provider is not None
     assert config.provider.name == 'fakeProvider'
@@ -208,7 +208,7 @@ def test_context_interpolation(ldai_client: LDAIClient):
     default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World'}
 
-    config, _ = ldai_client.config(
+    config = ldai_client.config(
         'ctx-interpolation', context, default_value, variables
     )
 
@@ -231,7 +231,7 @@ def test_multi_context_interpolation(ldai_client: LDAIClient):
     default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World'}
 
-    config, _ = ldai_client.config(
+    config = ldai_client.config(
         'multi-ctx-interpolation', context, default_value, variables
     )
 
@@ -252,7 +252,7 @@ def test_model_config_multiple(ldai_client: LDAIClient):
     default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World', 'day': 'Monday'}
 
-    config, _ = ldai_client.config(
+    config = ldai_client.config(
         'multiple-messages', context, default_value, variables
     )
 
@@ -272,7 +272,7 @@ def test_model_config_disabled(ldai_client: LDAIClient):
     context = Context.create('user-key')
     default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
-    config, _ = ldai_client.config('off-config', context, default_value, {})
+    config = ldai_client.config('off-config', context, default_value, {})
 
     assert config.model is not None
     assert config.enabled is False
@@ -285,7 +285,7 @@ def test_model_initial_config_disabled(ldai_client: LDAIClient):
     context = Context.create('user-key')
     default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
-    config, _ = ldai_client.config('initial-config-disabled', context, default_value, {})
+    config = ldai_client.config('initial-config-disabled', context, default_value, {})
 
     assert config.enabled is False
     assert config.model is None
@@ -297,7 +297,7 @@ def test_model_initial_config_enabled(ldai_client: LDAIClient):
     context = Context.create('user-key')
     default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
-    config, _ = ldai_client.config('initial-config-enabled', context, default_value, {})
+    config = ldai_client.config('initial-config-enabled', context, default_value, {})
 
     assert config.enabled is True
     assert config.model is None
@@ -320,7 +320,7 @@ def test_config_method_tracking(ldai_client: LDAIClient):
     context = Context.create('user-key')
     default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
-    config, tracker = client.config('test-config-key', context, default_value)
+    config = client.config('test-config-key', context, default_value)
 
     mock_client.track.assert_called_once_with(
         '$ld:ai:config:function:single',
diff --git a/ldai/testing/test_tracker.py b/ldai/testing/test_tracker.py
index 19c8161..2e39d98 100644
--- a/ldai/testing/test_tracker.py
+++ b/ldai/testing/test_tracker.py
@@ -276,7 +276,8 @@ def test_tracks_bedrock_metrics_with_error(client: LDClient):
     assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
 
 
-def test_tracks_openai_metrics(client: LDClient):
+@pytest.mark.asyncio
+async def test_tracks_openai_metrics(client: LDClient):
     context = Context.create("user-key")
     tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
 
@@ -292,7 +293,10 @@ def to_dict(self):
                 "completion_tokens": 110,
             }
 
-    tracker.track_openai_metrics(lambda: Result())
+    async def get_result():
+        return Result()
+
+    await tracker.track_openai_metrics(get_result)
 
     calls = [
         call(
@@ -326,15 +330,16 @@ def to_dict(self):
     assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
 
 
-def test_tracks_openai_metrics_with_exception(client: LDClient):
+@pytest.mark.asyncio
+async def test_tracks_openai_metrics_with_exception(client: LDClient):
     context = Context.create("user-key")
     tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
 
-    def raise_exception():
+    async def raise_exception():
         raise ValueError("Something went wrong")
 
     try:
-        tracker.track_openai_metrics(raise_exception)
+        await tracker.track_openai_metrics(raise_exception)
         assert False, "Should have thrown an exception"
     except ValueError:
         pass

From cae7952fe8d3aca10eed8560bed8b5e70dafe4f6 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Tue, 9 Dec 2025 19:11:29 +0100
Subject: [PATCH 15/28] fix lint

---
 ldai/__init__.py                        | 34 +++----------
 ldai/chat/__init__.py                   | 35 +++++++------
 ldai/client.py                          | 38 ++++++---------
 ldai/judge/__init__.py                  | 27 +++++-----
 ldai/judge/evaluation_schema_builder.py |  7 ++-
 ldai/models.py                          | 18 +++----
 ldai/providers/__init__.py              |  4 +-
 ldai/providers/ai_provider.py           | 31 ++++++------
 ldai/providers/ai_provider_factory.py   | 16 +++---
 ldai/providers/langchain/__init__.py    | 65 +++++++++++++++----------
 ldai/providers/types.py                 |  1 -
 ldai/testing/test_model_config.py       | 25 +++++-----
 ldai/tracker.py                         |  6 +--
 setup.cfg                               |  2 +-
 14 files changed, 143 insertions(+), 166 deletions(-)

diff --git a/ldai/__init__.py b/ldai/__init__.py
index 617ac3a..78125d7 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -1,35 +1,17 @@
 __version__ = "0.10.1"  # x-release-please-version
 
 # Export main client
+# Export chat
+from ldai.chat import TrackedChat
 from ldai.client import LDAIClient
-
-# Export models for convenience
-from ldai.models import (
-    AIAgentConfig,
-    AIAgentConfigDefault,
-    AIAgentConfigRequest,
-    AIAgents,
-    AICompletionConfig,
-    AICompletionConfigDefault,
-    AIJudgeConfig,
-    AIJudgeConfigDefault,
-    JudgeConfiguration,
-    LDMessage,
-    ModelConfig,
-    ProviderConfig,
-    # Deprecated aliases for backward compatibility
-    AIConfig,
-    LDAIAgent,
-    LDAIAgentConfig,
-    LDAIAgentDefaults,
-)
-
 # Export judge
 from ldai.judge import AIJudge
-
-# Export chat
-from ldai.chat import TrackedChat
-
+# Export models for convenience
+from ldai.models import (  # Deprecated aliases for backward compatibility
+    AIAgentConfig, AIAgentConfigDefault, AIAgentConfigRequest, AIAgents,
+    AICompletionConfig, AICompletionConfigDefault, AIConfig, AIJudgeConfig,
+    AIJudgeConfigDefault, JudgeConfiguration, LDAIAgent, LDAIAgentConfig,
+    LDAIAgentDefaults, LDMessage, ModelConfig, ProviderConfig)
 # Export judge types
 from ldai.providers.types import EvalScore, JudgeResponse
 
diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
index 0785c16..bcb4284 100644
--- a/ldai/chat/__init__.py
+++ b/ldai/chat/__init__.py
@@ -3,10 +3,10 @@
 import asyncio
 from typing import Any, Dict, List, Optional
 
+from ldai.judge import AIJudge
 from ldai.models import AICompletionConfig, LDMessage
 from ldai.providers.ai_provider import AIProvider
 from ldai.providers.types import ChatResponse, JudgeResponse
-from ldai.judge import AIJudge
 from ldai.tracker import LDAIConfigTracker
 
 
@@ -14,7 +14,7 @@ class TrackedChat:
     """
     Concrete implementation of TrackedChat that provides chat functionality
     by delegating to an AIProvider implementation.
-    
+
     This class handles conversation management and tracking, while delegating
     the actual model invocation to the provider.
     """
@@ -29,7 +29,7 @@ def __init__(
     ):
         """
         Initialize the TrackedChat.
-        
+
         :param ai_config: The completion AI configuration
         :param tracker: The tracker for the completion configuration
         :param provider: The AI provider to use for chat
@@ -46,9 +46,9 @@ def __init__(
     async def invoke(self, prompt: str) -> ChatResponse:
         """
         Invoke the chat model with a prompt string.
-        
+
         This method handles conversation management and tracking, delegating to the provider's invoke_model method.
-        
+
         :param prompt: The user prompt to send to the chat model
         :return: ChatResponse containing the model's response and metrics
         """
@@ -86,9 +86,9 @@ def _start_judge_evaluations(
     ) -> List[asyncio.Task[Optional[JudgeResponse]]]:
         """
         Start judge evaluations as async tasks without awaiting them.
-        
+
         Returns a list of async tasks that can be awaited later.
-        
+
         :param messages: Array of messages representing the conversation history
         :param response: The AI response to be evaluated
         :return: List of async tasks that will return judge evaluation results
@@ -122,13 +122,13 @@ async def evaluate_judge(judge_config):
             asyncio.create_task(evaluate_judge(judge_config))
             for judge_config in judge_configs
         ]
-        
+
         return tasks
 
     def get_config(self) -> AICompletionConfig:
         """
         Get the underlying AI configuration used to initialize this TrackedChat.
-        
+
         :return: The AI completion configuration
         """
         return self._ai_config
@@ -136,7 +136,7 @@ def get_config(self) -> AICompletionConfig:
     def get_tracker(self) -> LDAIConfigTracker:
         """
         Get the underlying AI configuration tracker used to initialize this TrackedChat.
-        
+
         :return: The tracker instance
         """
         return self._tracker
@@ -144,9 +144,9 @@ def get_tracker(self) -> LDAIConfigTracker:
     def get_provider(self) -> AIProvider:
         """
         Get the underlying AI provider instance.
-        
+
         This provides direct access to the provider for advanced use cases.
-        
+
         :return: The AI provider instance
         """
         return self._provider
@@ -154,9 +154,9 @@ def get_provider(self) -> AIProvider:
     def get_judges(self) -> Dict[str, AIJudge]:
         """
         Get the judges associated with this TrackedChat.
-        
+
         Returns a dictionary of judge instances keyed by their configuration keys.
-        
+
         :return: Dictionary of judge instances
         """
         return self._judges
@@ -164,10 +164,10 @@ def get_judges(self) -> Dict[str, AIJudge]:
     def append_messages(self, messages: List[LDMessage]) -> None:
         """
         Append messages to the conversation history.
-        
+
         Adds messages to the conversation history without invoking the model,
         which is useful for managing multi-turn conversations or injecting context.
-        
+
         :param messages: Array of messages to append to the conversation history
         """
         self._messages.extend(messages)
@@ -175,7 +175,7 @@ def append_messages(self, messages: List[LDMessage]) -> None:
     def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]:
         """
         Get all messages in the conversation history.
-        
+
         :param include_config_messages: Whether to include the config messages from the AIConfig.
                                        Defaults to False.
         :return: Array of messages. When include_config_messages is True, returns both config
@@ -186,4 +186,3 @@ def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]
             config_messages = self._ai_config.messages or []
             return config_messages + self._messages
         return list(self._messages)
-
diff --git a/ldai/client.py b/ldai/client.py
index 4f4b6b9..086e99b 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -7,21 +7,13 @@
 
 from ldai.chat import TrackedChat
 from ldai.judge import AIJudge
-from ldai.models import (
-    AIAgentConfig,
-    AIAgentConfigDefault,
-    AIAgentConfigRequest,
-    AIAgents,
-    AICompletionConfig,
-    AICompletionConfigDefault,
-    AIJudgeConfig,
-    AIJudgeConfigDefault,
-    JudgeConfiguration,
-    LDMessage,
-    ModelConfig,
-    ProviderConfig,
-)
-from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
+from ldai.models import (AIAgentConfig, AIAgentConfigDefault,
+                         AIAgentConfigRequest, AIAgents, AICompletionConfig,
+                         AICompletionConfigDefault, AIJudgeConfig,
+                         AIJudgeConfigDefault, JudgeConfiguration, LDMessage,
+                         ModelConfig, ProviderConfig)
+from ldai.providers.ai_provider_factory import (AIProviderFactory,
+                                                SupportedAIProvider)
 from ldai.tracker import LDAIConfigTracker
 
 
@@ -204,7 +196,7 @@ async def _initialize_judges(
     ) -> Dict[str, AIJudge]:
         """
         Initialize judges from judge configurations.
-        
+
         :param judge_configs: List of judge configurations
         :param context: Standard Context used when evaluating flags
         :param variables: Dictionary of values for instruction interpolation
@@ -212,7 +204,7 @@ async def _initialize_judges(
         :return: Dictionary of judge instances keyed by their configuration keys
         """
         judges: Dict[str, AIJudge] = {}
-        
+
         async def create_judge_for_config(judge_key: str):
             judge = await self.create_judge(
                 judge_key,
@@ -222,22 +214,22 @@ async def create_judge_for_config(judge_key: str):
                 default_ai_provider,
             )
             return judge_key, judge
-        
+
         judge_promises = [
             create_judge_for_config(judge_config.key)
             for judge_config in judge_configs
         ]
-        
+
         import asyncio
         results = await asyncio.gather(*judge_promises, return_exceptions=True)
-        
+
         for result in results:
             if isinstance(result, Exception):
                 continue
-            judge_key, judge = result
+            judge_key, judge = result  # type: ignore[misc]
             if judge:
                 judges[judge_key] = judge
-        
+
         return judges
 
     async def create_chat(
@@ -275,7 +267,7 @@ async def create_chat(
             if chat:
                 response = await chat.invoke("I need help with my order")
                 print(response.message.content)
-                
+
                 # Access conversation history
                 messages = chat.get_messages()
                 print(f"Conversation has {len(messages)} messages")
diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py
index 323cd19..3caad65 100644
--- a/ldai/judge/__init__.py
+++ b/ldai/judge/__init__.py
@@ -5,17 +5,18 @@
 
 import chevron
 
+from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder
 from ldai.models import AIJudgeConfig, LDMessage
 from ldai.providers.ai_provider import AIProvider
-from ldai.providers.types import ChatResponse, EvalScore, JudgeResponse, StructuredResponse
+from ldai.providers.types import (ChatResponse, EvalScore, JudgeResponse,
+                                  StructuredResponse)
 from ldai.tracker import LDAIConfigTracker
-from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder
 
 
 class AIJudge:
     """
     Judge implementation that handles evaluation functionality and conversation management.
-    
+
     According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
     other AI Configs using structured output.
     """
@@ -29,7 +30,7 @@ def __init__(
     ):
         """
         Initialize the Judge.
-        
+
         :param ai_config: The judge AI configuration
         :param ai_config_tracker: The tracker for the judge configuration
         :param ai_provider: The AI provider to use for evaluation
@@ -51,7 +52,7 @@ async def evaluate(
     ) -> Optional[JudgeResponse]:
         """
         Evaluates an AI response using the judge's configuration.
-        
+
         :param input_text: The input prompt or question that was provided to the AI
         :param output_text: The AI-generated response to be evaluated
         :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
@@ -113,7 +114,7 @@ async def evaluate_messages(
     ) -> Optional[JudgeResponse]:
         """
         Evaluates an AI response from chat messages and response.
-        
+
         :param messages: Array of messages representing the conversation history
         :param response: The AI response to be evaluated
         :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
@@ -127,7 +128,7 @@ async def evaluate_messages(
     def get_ai_config(self) -> AIJudgeConfig:
         """
         Returns the AI Config used by this judge.
-        
+
         :return: The judge AI configuration
         """
         return self._ai_config
@@ -135,7 +136,7 @@ def get_ai_config(self) -> AIJudgeConfig:
     def get_tracker(self) -> LDAIConfigTracker:
         """
         Returns the tracker associated with this judge.
-        
+
         :return: The tracker for the judge configuration
         """
         return self._ai_config_tracker
@@ -143,7 +144,7 @@ def get_tracker(self) -> LDAIConfigTracker:
     def get_provider(self) -> AIProvider:
         """
         Returns the AI provider used by this judge.
-        
+
         :return: The AI provider
         """
         return self._ai_provider
@@ -151,7 +152,7 @@ def get_provider(self) -> AIProvider:
     def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]:
         """
         Constructs evaluation messages by combining judge's config messages with input/output.
-        
+
         :param input_text: The input text
         :param output_text: The output text to evaluate
         :return: List of messages for evaluation
@@ -173,7 +174,7 @@ def _construct_evaluation_messages(self, input_text: str, output_text: str) -> l
     def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str:
         """
         Interpolates message content with variables using Mustache templating.
-        
+
         :param content: The message content template
         :param variables: Variables to interpolate
         :return: Interpolated message content
@@ -184,7 +185,7 @@ def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str:
     def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]:
         """
         Parses the structured evaluation response from the AI provider.
-        
+
         :param data: The structured response data
         :return: Dictionary of evaluation scores keyed by metric key
         """
@@ -227,5 +228,3 @@ def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScor
             results[metric_key] = EvalScore(score=float(score), reasoning=reasoning)
 
         return results
-
-
diff --git a/ldai/judge/evaluation_schema_builder.py b/ldai/judge/evaluation_schema_builder.py
index 1965e64..c996f08 100644
--- a/ldai/judge/evaluation_schema_builder.py
+++ b/ldai/judge/evaluation_schema_builder.py
@@ -13,7 +13,7 @@ class EvaluationSchemaBuilder:
     def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
         """
         Build an evaluation response schema from evaluation metric keys.
-        
+
         :param evaluation_metric_keys: List of evaluation metric keys
         :return: Schema dictionary for structured output
         """
@@ -38,7 +38,7 @@ def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
     def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
         """
         Build properties for each evaluation metric key.
-        
+
         :param evaluation_metric_keys: List of evaluation metric keys
         :return: Dictionary of properties for each key
         """
@@ -51,7 +51,7 @@ def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]:
     def _build_key_schema(key: str) -> Dict[str, Any]:
         """
         Build schema for a single evaluation metric key.
-        
+
         :param key: Evaluation metric key
         :return: Schema dictionary for the key
         """
@@ -72,4 +72,3 @@ def _build_key_schema(key: str) -> Dict[str, Any]:
             'required': ['score', 'reasoning'],
             'additionalProperties': False,
         }
-
diff --git a/ldai/models.py b/ldai/models.py
index fa36f8c..c075dcf 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -110,7 +110,7 @@ class JudgeConfiguration:
     """
     Configuration for judge attachment to AI Configs.
     """
-    
+
     @dataclass(frozen=True)
     class Judge:
         """
@@ -127,7 +127,7 @@ def to_dict(self) -> dict:
                 'key': self.key,
                 'samplingRate': self.sampling_rate,
             }
-    
+
     judges: List['JudgeConfiguration.Judge']
 
     def to_dict(self) -> dict:
@@ -342,15 +342,10 @@ class AIAgentConfigRequest:
 # Deprecated Type Aliases for Backward Compatibility
 # ============================================================================
 
-# Note: These are type aliases that point to the new types.
-# Since Python uses duck typing, these will work at runtime even if type checkers complain.
-# The old AIConfig had optional enabled, so it maps to AICompletionConfigDefault
-# The old AIConfig return type had required enabled, so it maps to AICompletionConfig
-
-# Deprecated: Use AICompletionConfigDefault instead
-# This was the old AIConfig with optional enabled (used as input/default)
-# Note: We map to AICompletionConfigDefault since the old AIConfig had optional enabled
-AIConfig = AICompletionConfigDefault
+# Note: AIConfig is now defined above as a base class (line 169).
+# For backward compatibility, code should migrate to:
+# - Use AICompletionConfigDefault for default/input values
+# - Use AICompletionConfig for return values
 
 # Deprecated: Use AIAgentConfigDefault instead
 LDAIAgentDefaults = AIAgentConfigDefault
@@ -360,4 +355,3 @@ class AIAgentConfigRequest:
 
 # Deprecated: Use AIAgentConfig instead (note: this was the old return type)
 LDAIAgent = AIAgentConfig
-
diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py
index 1beffb4..1b3452b 100644
--- a/ldai/providers/__init__.py
+++ b/ldai/providers/__init__.py
@@ -1,7 +1,8 @@
 """AI Provider interfaces and factory for LaunchDarkly AI SDK."""
 
 from ldai.providers.ai_provider import AIProvider
-from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider
+from ldai.providers.ai_provider_factory import (AIProviderFactory,
+                                                SupportedAIProvider)
 
 # Export LangChain provider if available
 try:
@@ -18,4 +19,3 @@
         'AIProviderFactory',
         'SupportedAIProvider',
     ]
-
diff --git a/ldai/providers/ai_provider.py b/ldai/providers/ai_provider.py
index 5863a74..cc7b21e 100644
--- a/ldai/providers/ai_provider.py
+++ b/ldai/providers/ai_provider.py
@@ -10,10 +10,10 @@
 class AIProvider(ABC):
     """
     Abstract base class for AI providers that implement chat model functionality.
-    
+
     This class provides the contract that all provider implementations must follow
     to integrate with LaunchDarkly's tracking and configuration capabilities.
-    
+
     Following the AICHAT spec recommendation to use base classes with non-abstract methods
     for better extensibility and backwards compatibility.
     """
@@ -21,7 +21,7 @@ class AIProvider(ABC):
     def __init__(self, logger: Optional[Any] = None):
         """
         Initialize the AI provider.
-        
+
         :param logger: Optional logger for logging provider operations.
         """
         self.logger = logger
@@ -29,22 +29,22 @@ def __init__(self, logger: Optional[Any] = None):
     async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
         """
         Invoke the chat model with an array of messages.
-        
+
         This method should convert messages to provider format, invoke the model,
         and return a ChatResponse with the result and metrics.
-        
+
         Default implementation takes no action and returns a placeholder response.
         Provider implementations should override this method.
-        
+
         :param messages: Array of LDMessage objects representing the conversation
         :return: ChatResponse containing the model's response
         """
         if self.logger:
             self.logger.warn('invokeModel not implemented by this provider')
-        
+
         from ldai.models import LDMessage
         from ldai.providers.types import LDAIMetrics
-        
+
         return ChatResponse(
             message=LDMessage(role='assistant', content=''),
             metrics=LDAIMetrics(success=False, usage=None),
@@ -57,22 +57,22 @@ async def invoke_structured_model(
     ) -> StructuredResponse:
         """
         Invoke the chat model with structured output support.
-        
+
         This method should convert messages to provider format, invoke the model with
         structured output configuration, and return a structured response.
-        
+
         Default implementation takes no action and returns a placeholder response.
         Provider implementations should override this method.
-        
+
         :param messages: Array of LDMessage objects representing the conversation
         :param response_structure: Dictionary of output configurations keyed by output name
         :return: StructuredResponse containing the structured data
         """
         if self.logger:
             self.logger.warn('invokeStructuredModel not implemented by this provider')
-        
+
         from ldai.providers.types import LDAIMetrics
-        
+
         return StructuredResponse(
             data={},
             raw_response='',
@@ -84,13 +84,12 @@ async def invoke_structured_model(
     async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'AIProvider':
         """
         Static method that constructs an instance of the provider.
-        
+
         Each provider implementation must provide their own static create method
         that accepts an AIConfigKind and returns a configured instance.
-        
+
         :param ai_config: The LaunchDarkly AI configuration
         :param logger: Optional logger for the provider
         :return: Configured provider instance
         """
         raise NotImplementedError('Provider implementations must override the static create method')
-
diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py
index 41cc1c2..5dd441d 100644
--- a/ldai/providers/ai_provider_factory.py
+++ b/ldai/providers/ai_provider_factory.py
@@ -6,7 +6,6 @@
 from ldai.models import AIConfigKind
 from ldai.providers.ai_provider import AIProvider
 
-
 # List of supported AI providers
 SUPPORTED_AI_PROVIDERS = [
     # Multi-provider packages should be last in the list
@@ -30,10 +29,10 @@ async def create(
     ) -> Optional[AIProvider]:
         """
         Create an AIProvider instance based on the AI configuration.
-        
+
         This method attempts to load provider-specific implementations dynamically.
         Returns None if the provider is not supported.
-        
+
         :param ai_config: The AI configuration
         :param logger: Optional logger for logging provider initialization
         :param default_ai_provider: Optional default AI provider to use
@@ -63,7 +62,7 @@ def _get_providers_to_try(
     ) -> List[SupportedAIProvider]:
         """
         Determine which providers to try based on default_ai_provider and provider_name.
-        
+
         :param default_ai_provider: Optional default provider to use
         :param provider_name: Optional provider name from config
         :return: List of providers to try in order
@@ -84,7 +83,9 @@ def _get_providers_to_try(
         for provider in multi_provider_packages:
             provider_set.add(provider)
 
-        return list(provider_set)
+        # Return list of providers, converting from set
+        # The set contains strings that should be valid SupportedAIProvider values
+        return list(provider_set)  # type: ignore[arg-type]
 
     @staticmethod
     async def _try_create_provider(
@@ -94,7 +95,7 @@ async def _try_create_provider(
     ) -> Optional[AIProvider]:
         """
         Try to create a provider of the specified type.
-        
+
         :param provider_type: Type of provider to create
         :param ai_config: AI configuration
         :param logger: Optional logger
@@ -136,7 +137,7 @@ async def _create_provider(
     ) -> Optional[AIProvider]:
         """
         Create a provider instance dynamically.
-        
+
         :param package_name: Name of the package containing the provider
         :param provider_class_name: Name of the provider class
         :param ai_config: AI configuration
@@ -164,4 +165,3 @@ async def _create_provider(
                     f"with package {package_name}: {error}"
                 )
             return None
-
diff --git a/ldai/providers/langchain/__init__.py b/ldai/providers/langchain/__init__.py
index f2e2c35..f1a5aa1 100644
--- a/ldai/providers/langchain/__init__.py
+++ b/ldai/providers/langchain/__init__.py
@@ -3,7 +3,8 @@
 from typing import Any, Dict, List, Optional
 
 from langchain_core.language_models.chat_models import BaseChatModel
-from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
+from langchain_core.messages import (AIMessage, BaseMessage, HumanMessage,
+                                     SystemMessage)
 
 from ldai.models import AIConfigKind, LDMessage
 from ldai.providers.ai_provider import AIProvider
@@ -14,14 +15,14 @@
 class LangChainProvider(AIProvider):
     """
     LangChain implementation of AIProvider.
-    
+
     This provider integrates LangChain models with LaunchDarkly's tracking capabilities.
     """
 
     def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None):
         """
         Initialize the LangChain provider.
-        
+
         :param llm: LangChain BaseChatModel instance
         :param logger: Optional logger for logging provider operations
         """
@@ -36,7 +37,7 @@ def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None):
     async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider':
         """
         Static factory method to create a LangChain AIProvider from an AI configuration.
-        
+
         :param ai_config: The LaunchDarkly AI configuration
         :param logger: Optional logger for the provider
         :return: Configured LangChainProvider instance
@@ -51,7 +52,7 @@ async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'Lang
     async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
         """
         Invoke the LangChain model with an array of messages.
-        
+
         :param messages: Array of LDMessage objects representing the conversation
         :return: ChatResponse containing the model's response
         """
@@ -60,10 +61,15 @@ async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
             langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
 
             # Get the LangChain response
-            response: AIMessage = await self._llm.ainvoke(langchain_messages)
+            response: BaseMessage = await self._llm.ainvoke(langchain_messages)
 
             # Generate metrics early (assumes success by default)
-            metrics = LangChainProvider.get_ai_metrics_from_response(response)
+            # Most chat models return AIMessage, but we handle BaseMessage generically
+            if isinstance(response, AIMessage):
+                metrics = LangChainProvider.get_ai_metrics_from_response(response)
+            else:
+                # For non-AIMessage responses, create default metrics
+                metrics = LDAIMetrics(success=True, usage=TokenUsage(total=0, input=0, output=0))
 
             # Extract text content from the response
             content: str = ''
@@ -104,7 +110,7 @@ async def invoke_structured_model(
     ) -> StructuredResponse:
         """
         Invoke the LangChain model with structured output support.
-        
+
         :param messages: Array of LDMessage objects representing the conversation
         :param response_structure: Dictionary of output configurations keyed by output name
         :return: StructuredResponse containing the structured data
@@ -124,7 +130,10 @@ async def invoke_structured_model(
                 if isinstance(response_obj, AIMessage):
                     import json
                     try:
-                        response = json.loads(response_obj.content)
+                        if isinstance(response_obj.content, str):
+                            response = json.loads(response_obj.content)
+                        else:
+                            response = {'content': response_obj.content}
                     except json.JSONDecodeError:
                         response = {'content': response_obj.content}
                 else:
@@ -158,7 +167,7 @@ async def invoke_structured_model(
     def get_chat_model(self) -> BaseChatModel:
         """
         Get the underlying LangChain model instance.
-        
+
         :return: The LangChain BaseChatModel instance
         """
         return self._llm
@@ -171,10 +180,10 @@ def get_chat_model(self) -> BaseChatModel:
     def map_provider(ld_provider_name: str) -> str:
         """
         Map LaunchDarkly provider names to LangChain provider names.
-        
+
         This method enables seamless integration between LaunchDarkly's standardized
         provider naming and LangChain's naming conventions.
-        
+
         :param ld_provider_name: LaunchDarkly provider name
         :return: LangChain provider name
         """
@@ -190,10 +199,10 @@ def map_provider(ld_provider_name: str) -> str:
     def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
         """
         Get AI metrics from a LangChain provider response.
-        
+
         This method extracts token usage information and success status from LangChain responses
         and returns a LaunchDarkly LDAIMetrics object.
-        
+
         :param response: The response from the LangChain model
         :return: LDAIMetrics with success status and token usage
         """
@@ -215,10 +224,10 @@ def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
     def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]:
         """
         Convert LaunchDarkly messages to LangChain messages.
-        
+
         This helper method enables developers to work directly with LangChain message types
         while maintaining compatibility with LaunchDarkly's standardized message format.
-        
+
         :param messages: List of LDMessage objects
         :return: List of LangChain message objects
         """
@@ -238,10 +247,10 @@ def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage
     async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
         """
         Create a LangChain model from an AI configuration.
-        
+
         This public helper method enables developers to initialize their own LangChain models
         using LaunchDarkly AI configurations.
-        
+
         :param ai_config: The LaunchDarkly AI configuration
         :return: A configured LangChain BaseChatModel
         """
@@ -256,22 +265,27 @@ async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
         try:
             # Try to import init_chat_model from langchain.chat_models
             # This is available in langchain >= 0.1.0
+            # Use importlib to avoid mypy no-redef error with fallback imports
+            import importlib
+            init_chat_model = None
             try:
-                from langchain.chat_models import init_chat_model
-            except ImportError:
+                module = importlib.import_module('langchain.chat_models')
+                init_chat_model = getattr(module, 'init_chat_model')
+            except (ImportError, AttributeError):
                 # Fallback for older versions or different import path
-                from langchain.chat_models.universal import init_chat_model
-            
+                module = importlib.import_module('langchain.chat_models.universal')
+                init_chat_model = getattr(module, 'init_chat_model')
+
             # Map provider name
             langchain_provider = LangChainProvider.map_provider(provider)
-            
+
             # Create model configuration
             model_kwargs = {**parameters}
             if langchain_provider:
                 model_kwargs['model_provider'] = langchain_provider
-            
+
             # Initialize the chat model (init_chat_model may be async or sync)
-            result = init_chat_model(model_name, **model_kwargs)
+            result = init_chat_model(model_name, **model_kwargs)  # type: ignore[misc]
             # Handle both sync and async initialization
             if hasattr(result, '__await__'):
                 return await result
@@ -281,4 +295,3 @@ async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
                 'langchain package is required for LangChainProvider. '
                 'Install it with: pip install langchain langchain-core'
             ) from e
-
diff --git a/ldai/providers/types.py b/ldai/providers/types.py
index 45df755..de54698 100644
--- a/ldai/providers/types.py
+++ b/ldai/providers/types.py
@@ -89,4 +89,3 @@ def to_dict(self) -> Dict[str, Any]:
         if self.error is not None:
             result['error'] = self.error
         return result
-
diff --git a/ldai/testing/test_model_config.py b/ldai/testing/test_model_config.py
index 13c0523..26a02c9 100644
--- a/ldai/testing/test_model_config.py
+++ b/ldai/testing/test_model_config.py
@@ -2,7 +2,8 @@
 from ldclient import Config, Context, LDClient
 from ldclient.integrations.test_data import TestData
 
-from ldai import AIConfig, LDAIClient, LDMessage, ModelConfig
+from ldai import LDAIClient, LDMessage, ModelConfig
+from ldai.models import AICompletionConfigDefault
 
 
 @pytest.fixture
@@ -133,7 +134,7 @@ def test_model_config_handles_custom():
 
 def test_uses_default_on_invalid_flag(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(
+    default_value = AICompletionConfigDefault(
         enabled=True,
         model=ModelConfig('fakeModel', parameters={'temperature': 0.5, 'maxTokens': 4096}),
         messages=[LDMessage(role='system', content='Hello, {{name}}!')],
@@ -155,7 +156,7 @@ def test_uses_default_on_invalid_flag(ldai_client: LDAIClient):
 
 def test_model_config_interpolation(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(
+    default_value = AICompletionConfigDefault(
         enabled=True,
         model=ModelConfig('fakeModel'),
         messages=[LDMessage(role='system', content='Hello, {{name}}!')],
@@ -177,7 +178,7 @@ def test_model_config_interpolation(ldai_client: LDAIClient):
 
 def test_model_config_no_variables(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
 
     config = ldai_client.config('model-config', context, default_value, {})
 
@@ -194,7 +195,7 @@ def test_model_config_no_variables(ldai_client: LDAIClient):
 
 def test_provider_config_handling(ldai_client: LDAIClient):
     context = Context.builder('user-key').name("Sandy").build()
-    default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World'}
 
     config = ldai_client.config('model-config', context, default_value, variables)
@@ -205,7 +206,7 @@ def test_provider_config_handling(ldai_client: LDAIClient):
 
 def test_context_interpolation(ldai_client: LDAIClient):
     context = Context.builder('user-key').name("Sandy").set('last', 'Beaches').build()
-    default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World'}
 
     config = ldai_client.config(
@@ -228,7 +229,7 @@ def test_multi_context_interpolation(ldai_client: LDAIClient):
     user_context = Context.builder('user-key').name("Sandy").build()
     org_context = Context.builder('org-key').kind('org').name("LaunchDarkly").set('shortname', 'LD').build()
     context = Context.multi_builder().add(user_context).add(org_context).build()
-    default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World'}
 
     config = ldai_client.config(
@@ -249,7 +250,7 @@ def test_multi_context_interpolation(ldai_client: LDAIClient):
 
 def test_model_config_multiple(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[])
     variables = {'name': 'World', 'day': 'Monday'}
 
     config = ldai_client.config(
@@ -270,7 +271,7 @@ def test_model_config_multiple(ldai_client: LDAIClient):
 
 def test_model_config_disabled(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
     config = ldai_client.config('off-config', context, default_value, {})
 
@@ -283,7 +284,7 @@ def test_model_config_disabled(ldai_client: LDAIClient):
 
 def test_model_initial_config_disabled(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
     config = ldai_client.config('initial-config-disabled', context, default_value, {})
 
@@ -295,7 +296,7 @@ def test_model_initial_config_disabled(ldai_client: LDAIClient):
 
 def test_model_initial_config_enabled(ldai_client: LDAIClient):
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
     config = ldai_client.config('initial-config-enabled', context, default_value, {})
 
@@ -318,7 +319,7 @@ def test_config_method_tracking(ldai_client: LDAIClient):
 
     client = LDAIClient(mock_client)
     context = Context.create('user-key')
-    default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[])
+    default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[])
 
     config = client.config('test-config-key', context, default_value)
 
diff --git a/ldai/tracker.py b/ldai/tracker.py
index 632f0f4..11b846a 100644
--- a/ldai/tracker.py
+++ b/ldai/tracker.py
@@ -211,7 +211,7 @@ def track_eval_scores(self, scores: Dict[str, Any]) -> None:
         :param scores: Dictionary mapping metric keys to their evaluation scores (EvalScore objects)
         """
         from ldai.providers.types import EvalScore
-        
+
         # Track each evaluation score individually
         for metric_key, eval_score in scores.items():
             if isinstance(eval_score, EvalScore):
@@ -229,12 +229,12 @@ def track_judge_response(self, judge_response: Any) -> None:
         :param judge_response: JudgeResponse object containing evals and success status
         """
         from ldai.providers.types import JudgeResponse
-        
+
         if isinstance(judge_response, JudgeResponse):
             # Track evaluation scores
             if judge_response.evals:
                 self.track_eval_scores(judge_response.evals)
-            
+
             # Track success/error based on judge response
             if judge_response.success:
                 self.track_success()
diff --git a/setup.cfg b/setup.cfg
index c178190..1fb1827 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,2 +1,2 @@
 [pycodestyle]
-ignore = E501
+ignore = E501,W503

From 3ffb55d1434a8f1ba4af6c75d0b0bc46e79fafd2 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Tue, 9 Dec 2025 19:15:06 +0100
Subject: [PATCH 16/28] fix deps

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 200215c..9c1f44a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ chevron = "=0.14.0"
 pytest = ">=2.8"
 pytest-cov = ">=2.4.0"
 pytest-mypy = "==1.0.1"
+pytest-asyncio = ">=0.21.0"
 mypy = "==1.18.2"
 pycodestyle = "^2.12.1"
 isort = ">=5.13.2,<7.0.0"

From 64bb5f75b5a62022c1fb5a37fa1dde2e2c632db4 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Tue, 9 Dec 2025 22:59:05 +0100
Subject: [PATCH 17/28] remove langchain and comment ref lines for now

---
 ldai/providers/__init__.py            |  35 +--
 ldai/providers/ai_provider_factory.py |  42 ++--
 ldai/providers/langchain/__init__.py  | 297 --------------------------
 3 files changed, 44 insertions(+), 330 deletions(-)
 delete mode 100644 ldai/providers/langchain/__init__.py

diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py
index 1b3452b..48152cc 100644
--- a/ldai/providers/__init__.py
+++ b/ldai/providers/__init__.py
@@ -5,17 +5,24 @@
                                                 SupportedAIProvider)
 
 # Export LangChain provider if available
-try:
-    from ldai.providers.langchain import LangChainProvider
-    __all__ = [
-        'AIProvider',
-        'AIProviderFactory',
-        'LangChainProvider',
-        'SupportedAIProvider',
-    ]
-except ImportError:
-    __all__ = [
-        'AIProvider',
-        'AIProviderFactory',
-        'SupportedAIProvider',
-    ]
+# TODO: Uncomment when langchain provider package is introduced
+# try:
+#     from ldai.providers.langchain import LangChainProvider
+#     __all__ = [
+#         'AIProvider',
+#         'AIProviderFactory',
+#         'LangChainProvider',
+#         'SupportedAIProvider',
+#     ]
+# except ImportError:
+#     __all__ = [
+#         'AIProvider',
+#         'AIProviderFactory',
+#         'SupportedAIProvider',
+#     ]
+
+__all__ = [
+    'AIProvider',
+    'AIProviderFactory',
+    'SupportedAIProvider',
+]
diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py
index 5dd441d..3fd0f50 100644
--- a/ldai/providers/ai_provider_factory.py
+++ b/ldai/providers/ai_provider_factory.py
@@ -1,19 +1,21 @@
 """Factory for creating AIProvider instances based on the provider configuration."""
 
 import importlib
-from typing import Any, List, Literal, Optional, Type
+from typing import Any, Dict, List, Literal, Optional, Tuple, Type
 
 from ldai.models import AIConfigKind
 from ldai.providers.ai_provider import AIProvider
 
 # List of supported AI providers
-SUPPORTED_AI_PROVIDERS = [
+SUPPORTED_AI_PROVIDERS: List[str] = [
     # Multi-provider packages should be last in the list
-    'langchain',
+    # 'langchain',  # TODO: Uncomment when langchain provider package is introduced
 ]
 
 # Type representing the supported AI providers
-SupportedAIProvider = Literal['langchain']
+# TODO: Update this type when provider packages are introduced
+# SupportedAIProvider = Literal['langchain']
+SupportedAIProvider = Literal['none']  # Placeholder until providers are added
 
 
 class AIProviderFactory:
@@ -79,9 +81,10 @@ def _get_providers_to_try(
             provider_set.add(provider_name)  # type: ignore
 
         # Then try multi-provider packages, but avoid duplicates
-        multi_provider_packages: List[SupportedAIProvider] = ['langchain']
-        for provider in multi_provider_packages:
-            provider_set.add(provider)
+        # TODO: Uncomment when langchain provider package is introduced
+        # multi_provider_packages: List[SupportedAIProvider] = ['langchain']
+        # for provider in multi_provider_packages:
+        #     provider_set.add(provider)
 
         # Return list of providers, converting from set
         # The set contains strings that should be valid SupportedAIProvider values
@@ -102,20 +105,21 @@ async def _try_create_provider(
         :return: AIProvider instance or None if creation failed
         """
         # Handle built-in providers (part of this package)
-        if provider_type == 'langchain':
-            try:
-                from ldai.providers.langchain import LangChainProvider
-                return await LangChainProvider.create(ai_config, logger)
-            except ImportError as error:
-                if logger:
-                    logger.warn(
-                        f"Error creating LangChainProvider: {error}. "
-                        f"Make sure langchain and langchain-core packages are installed."
-                    )
-                return None
+        # TODO: Uncomment when langchain provider package is introduced
+        # if provider_type == 'langchain':
+        #     try:
+        #         from ldai.providers.langchain import LangChainProvider
+        #         return await LangChainProvider.create(ai_config, logger)
+        #     except ImportError as error:
+        #         if logger:
+        #             logger.warn(
+        #                 f"Error creating LangChainProvider: {error}. "
+        #                 f"Make sure langchain and langchain-core packages are installed."
+        #             )
+        #         return None
 
         # For future external providers, use dynamic import
-        provider_mappings = {
+        provider_mappings: Dict[str, Tuple[str, str]] = {
             # 'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'),
             # 'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'),
         }
diff --git a/ldai/providers/langchain/__init__.py b/ldai/providers/langchain/__init__.py
deleted file mode 100644
index f1a5aa1..0000000
--- a/ldai/providers/langchain/__init__.py
+++ /dev/null
@@ -1,297 +0,0 @@
-"""LangChain implementation of AIProvider for LaunchDarkly AI SDK."""
-
-from typing import Any, Dict, List, Optional
-
-from langchain_core.language_models.chat_models import BaseChatModel
-from langchain_core.messages import (AIMessage, BaseMessage, HumanMessage,
-                                     SystemMessage)
-
-from ldai.models import AIConfigKind, LDMessage
-from ldai.providers.ai_provider import AIProvider
-from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse
-from ldai.tracker import TokenUsage
-
-
-class LangChainProvider(AIProvider):
-    """
-    LangChain implementation of AIProvider.
-
-    This provider integrates LangChain models with LaunchDarkly's tracking capabilities.
-    """
-
-    def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None):
-        """
-        Initialize the LangChain provider.
-
-        :param llm: LangChain BaseChatModel instance
-        :param logger: Optional logger for logging provider operations
-        """
-        super().__init__(logger)
-        self._llm = llm
-
-    # =============================================================================
-    # MAIN FACTORY METHOD
-    # =============================================================================
-
-    @staticmethod
-    async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider':
-        """
-        Static factory method to create a LangChain AIProvider from an AI configuration.
-
-        :param ai_config: The LaunchDarkly AI configuration
-        :param logger: Optional logger for the provider
-        :return: Configured LangChainProvider instance
-        """
-        llm = await LangChainProvider.create_langchain_model(ai_config)
-        return LangChainProvider(llm, logger)
-
-    # =============================================================================
-    # INSTANCE METHODS (AIProvider Implementation)
-    # =============================================================================
-
-    async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse:
-        """
-        Invoke the LangChain model with an array of messages.
-
-        :param messages: Array of LDMessage objects representing the conversation
-        :return: ChatResponse containing the model's response
-        """
-        try:
-            # Convert LDMessage[] to LangChain messages
-            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
-
-            # Get the LangChain response
-            response: BaseMessage = await self._llm.ainvoke(langchain_messages)
-
-            # Generate metrics early (assumes success by default)
-            # Most chat models return AIMessage, but we handle BaseMessage generically
-            if isinstance(response, AIMessage):
-                metrics = LangChainProvider.get_ai_metrics_from_response(response)
-            else:
-                # For non-AIMessage responses, create default metrics
-                metrics = LDAIMetrics(success=True, usage=TokenUsage(total=0, input=0, output=0))
-
-            # Extract text content from the response
-            content: str = ''
-            if isinstance(response.content, str):
-                content = response.content
-            else:
-                # Log warning for non-string content (likely multimodal)
-                if self.logger:
-                    self.logger.warn(
-                        f"Multimodal response not supported, expecting a string. "
-                        f"Content type: {type(response.content)}, Content: {response.content}"
-                    )
-                # Update metrics to reflect content loss
-                metrics.success = False
-
-            # Create the assistant message
-            from ldai.models import LDMessage
-            assistant_message = LDMessage(role='assistant', content=content)
-
-            return ChatResponse(
-                message=assistant_message,
-                metrics=metrics,
-            )
-        except Exception as error:
-            if self.logger:
-                self.logger.warn(f'LangChain model invocation failed: {error}')
-
-            from ldai.models import LDMessage
-            return ChatResponse(
-                message=LDMessage(role='assistant', content=''),
-                metrics=LDAIMetrics(success=False, usage=None),
-            )
-
-    async def invoke_structured_model(
-        self,
-        messages: List[LDMessage],
-        response_structure: Dict[str, Any],
-    ) -> StructuredResponse:
-        """
-        Invoke the LangChain model with structured output support.
-
-        :param messages: Array of LDMessage objects representing the conversation
-        :param response_structure: Dictionary of output configurations keyed by output name
-        :return: StructuredResponse containing the structured data
-        """
-        try:
-            # Convert LDMessage[] to LangChain messages
-            langchain_messages = LangChainProvider.convert_messages_to_langchain(messages)
-
-            # Get the LangChain response with structured output
-            # Note: with_structured_output is available on BaseChatModel in newer LangChain versions
-            if hasattr(self._llm, 'with_structured_output'):
-                structured_llm = self._llm.with_structured_output(response_structure)
-                response = await structured_llm.ainvoke(langchain_messages)
-            else:
-                # Fallback: invoke normally and try to parse as JSON
-                response_obj = await self._llm.ainvoke(langchain_messages)
-                if isinstance(response_obj, AIMessage):
-                    import json
-                    try:
-                        if isinstance(response_obj.content, str):
-                            response = json.loads(response_obj.content)
-                        else:
-                            response = {'content': response_obj.content}
-                    except json.JSONDecodeError:
-                        response = {'content': response_obj.content}
-                else:
-                    response = response_obj
-
-            # Using structured output doesn't support metrics
-            metrics = LDAIMetrics(
-                success=True,
-                usage=TokenUsage(total=0, input=0, output=0),
-            )
-
-            import json
-            return StructuredResponse(
-                data=response if isinstance(response, dict) else {'result': response},
-                raw_response=json.dumps(response) if not isinstance(response, str) else response,
-                metrics=metrics,
-            )
-        except Exception as error:
-            if self.logger:
-                self.logger.warn(f'LangChain structured model invocation failed: {error}')
-
-            return StructuredResponse(
-                data={},
-                raw_response='',
-                metrics=LDAIMetrics(
-                    success=False,
-                    usage=TokenUsage(total=0, input=0, output=0),
-                ),
-            )
-
-    def get_chat_model(self) -> BaseChatModel:
-        """
-        Get the underlying LangChain model instance.
-
-        :return: The LangChain BaseChatModel instance
-        """
-        return self._llm
-
-    # =============================================================================
-    # STATIC UTILITY METHODS
-    # =============================================================================
-
-    @staticmethod
-    def map_provider(ld_provider_name: str) -> str:
-        """
-        Map LaunchDarkly provider names to LangChain provider names.
-
-        This method enables seamless integration between LaunchDarkly's standardized
-        provider naming and LangChain's naming conventions.
-
-        :param ld_provider_name: LaunchDarkly provider name
-        :return: LangChain provider name
-        """
-        lowercased_name = ld_provider_name.lower()
-
-        mapping: Dict[str, str] = {
-            'gemini': 'google-genai',
-        }
-
-        return mapping.get(lowercased_name, lowercased_name)
-
-    @staticmethod
-    def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics:
-        """
-        Get AI metrics from a LangChain provider response.
-
-        This method extracts token usage information and success status from LangChain responses
-        and returns a LaunchDarkly LDAIMetrics object.
-
-        :param response: The response from the LangChain model
-        :return: LDAIMetrics with success status and token usage
-        """
-        # Extract token usage if available
-        usage: Optional[TokenUsage] = None
-        if hasattr(response, 'response_metadata') and response.response_metadata:
-            token_usage = response.response_metadata.get('token_usage')
-            if token_usage:
-                usage = TokenUsage(
-                    total=token_usage.get('total_tokens', 0) or token_usage.get('totalTokens', 0) or 0,
-                    input=token_usage.get('prompt_tokens', 0) or token_usage.get('promptTokens', 0) or 0,
-                    output=token_usage.get('completion_tokens', 0) or token_usage.get('completionTokens', 0) or 0,
-                )
-
-        # LangChain responses that complete successfully are considered successful by default
-        return LDAIMetrics(success=True, usage=usage)
-
-    @staticmethod
-    def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]:
-        """
-        Convert LaunchDarkly messages to LangChain messages.
-
-        This helper method enables developers to work directly with LangChain message types
-        while maintaining compatibility with LaunchDarkly's standardized message format.
-
-        :param messages: List of LDMessage objects
-        :return: List of LangChain message objects
-        """
-        result: List[BaseMessage] = []
-        for msg in messages:
-            if msg.role == 'system':
-                result.append(SystemMessage(content=msg.content))
-            elif msg.role == 'user':
-                result.append(HumanMessage(content=msg.content))
-            elif msg.role == 'assistant':
-                result.append(AIMessage(content=msg.content))
-            else:
-                raise ValueError(f'Unsupported message role: {msg.role}')
-        return result
-
-    @staticmethod
-    async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel:
-        """
-        Create a LangChain model from an AI configuration.
-
-        This public helper method enables developers to initialize their own LangChain models
-        using LaunchDarkly AI configurations.
-
-        :param ai_config: The LaunchDarkly AI configuration
-        :return: A configured LangChain BaseChatModel
-        """
-        model_name = ai_config.model.name if ai_config.model else ''
-        provider = ai_config.provider.name if ai_config.provider else ''
-        parameters = ai_config.model.get_parameter('parameters') if ai_config.model else {}
-        if not isinstance(parameters, dict):
-            parameters = {}
-
-        # Use LangChain's init_chat_model to support multiple providers
-        # Note: This requires langchain package to be installed
-        try:
-            # Try to import init_chat_model from langchain.chat_models
-            # This is available in langchain >= 0.1.0
-            # Use importlib to avoid mypy no-redef error with fallback imports
-            import importlib
-            init_chat_model = None
-            try:
-                module = importlib.import_module('langchain.chat_models')
-                init_chat_model = getattr(module, 'init_chat_model')
-            except (ImportError, AttributeError):
-                # Fallback for older versions or different import path
-                module = importlib.import_module('langchain.chat_models.universal')
-                init_chat_model = getattr(module, 'init_chat_model')
-
-            # Map provider name
-            langchain_provider = LangChainProvider.map_provider(provider)
-
-            # Create model configuration
-            model_kwargs = {**parameters}
-            if langchain_provider:
-                model_kwargs['model_provider'] = langchain_provider
-
-            # Initialize the chat model (init_chat_model may be async or sync)
-            result = init_chat_model(model_name, **model_kwargs)  # type: ignore[misc]
-            # Handle both sync and async initialization
-            if hasattr(result, '__await__'):
-                return await result
-            return result
-        except ImportError as e:
-            raise ImportError(
-                'langchain package is required for LangChainProvider. '
-                'Install it with: pip install langchain langchain-core'
-            ) from e

From 86acd6e8441ba31a7159de9696ffcf89308f8d70 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 10 Dec 2025 20:10:50 +0100
Subject: [PATCH 18/28] simplify

---
 ldai/chat/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
index bcb4284..931bb54 100644
--- a/ldai/chat/__init__.py
+++ b/ldai/chat/__init__.py
@@ -72,8 +72,7 @@ async def invoke(self, prompt: str) -> ChatResponse:
             and self._ai_config.judge_configuration.judges
             and len(self._ai_config.judge_configuration.judges) > 0
         ):
-            evaluation_tasks = self._start_judge_evaluations(self._messages, response)
-            response.evaluations = evaluation_tasks
+            response.evaluations = self._start_judge_evaluations(self._messages, response)
 
         # Add the response message to conversation history
         self._messages.append(response.message)

From 11f7602d844e8042c1e840d6aaf98f156973d6ac Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 10 Dec 2025 20:21:34 +0100
Subject: [PATCH 19/28] add judgeConfigKey

---
 ldai/chat/__init__.py   |  3 ++-
 ldai/providers/types.py |  3 +++
 ldai/tracker.py         | 25 +++++++++++++++----------
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
index 931bb54..1418561 100644
--- a/ldai/chat/__init__.py
+++ b/ldai/chat/__init__.py
@@ -112,7 +112,8 @@ async def evaluate_judge(judge_config):
             )
 
             if eval_result and eval_result.success:
-                self._tracker.track_eval_scores(eval_result.evals)
+                eval_result.judge_config_key = judge_config.key
+                self._tracker.track_judge_response(eval_result)
 
             return eval_result
 
diff --git a/ldai/providers/types.py b/ldai/providers/types.py
index de54698..436dd46 100644
--- a/ldai/providers/types.py
+++ b/ldai/providers/types.py
@@ -74,6 +74,7 @@ class JudgeResponse:
     """
     Response from a judge evaluation containing scores and reasoning for multiple metrics.
     """
+    judge_config_key: Optional[str] = None  # The key of the judge configuration that was used to generate this response
     evals: Dict[str, EvalScore]  # Dictionary where keys are metric names and values contain score and reasoning
     success: bool  # Whether the evaluation completed successfully
     error: Optional[str] = None  # Error message if evaluation failed
@@ -86,6 +87,8 @@ def to_dict(self) -> Dict[str, Any]:
             'evals': {key: eval_score.to_dict() for key, eval_score in self.evals.items()},
             'success': self.success,
         }
+        if self.judge_config_key is not None:
+            result['judgeConfigKey'] = self.judge_config_key
         if self.error is not None:
             result['error'] = self.error
         return result
diff --git a/ldai/tracker.py b/ldai/tracker.py
index 11b846a..8ed76f5 100644
--- a/ldai/tracker.py
+++ b/ldai/tracker.py
@@ -224,22 +224,27 @@ def track_eval_scores(self, scores: Dict[str, Any]) -> None:
 
     def track_judge_response(self, judge_response: Any) -> None:
         """
-        Track a judge response, including evaluation scores and success status.
+        Track a judge response, including evaluation scores with judge config key.
 
         :param judge_response: JudgeResponse object containing evals and success status
         """
-        from ldai.providers.types import JudgeResponse
+        from ldai.providers.types import JudgeResponse, EvalScore
 
         if isinstance(judge_response, JudgeResponse):
-            # Track evaluation scores
+            # Track evaluation scores with judge config key included in metadata
             if judge_response.evals:
-                self.track_eval_scores(judge_response.evals)
-
-            # Track success/error based on judge response
-            if judge_response.success:
-                self.track_success()
-            else:
-                self.track_error()
+                track_data = self.__get_track_data()
+                if judge_response.judge_config_key:
+                    track_data = {**track_data, 'judgeConfigKey': judge_response.judge_config_key}
+                
+                for metric_key, eval_score in judge_response.evals.items():
+                    if isinstance(eval_score, EvalScore):
+                        self._ld_client.track(
+                            metric_key,
+                            self._context,
+                            track_data,
+                            eval_score.score
+                        )
 
     def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None:
         """

From 06acc2169110af4fccd384b8e44e9c45674d34c6 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 10 Dec 2025 20:25:25 +0100
Subject: [PATCH 20/28] strongly type JudgeResponse

---
 ldai/providers/types.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ldai/providers/types.py b/ldai/providers/types.py
index 436dd46..5b208d4 100644
--- a/ldai/providers/types.py
+++ b/ldai/providers/types.py
@@ -1,5 +1,7 @@
 """Types for AI provider responses."""
 
+from __future__ import annotations
+
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 
@@ -38,7 +40,7 @@ class ChatResponse:
     """
     message: LDMessage
     metrics: LDAIMetrics
-    evaluations: Optional[List[Any]] = None  # List of JudgeResponse, will be populated later
+    evaluations: Optional[List[JudgeResponse]] = None  # List of JudgeResponse, will be populated later
 
 
 @dataclass

From 84669d5622b2dcfe02f25e9161fb70a81562df1a Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Wed, 10 Dec 2025 20:32:19 +0100
Subject: [PATCH 21/28] AIJudge to Judge

---
 ldai/__init__.py       |  4 ++--
 ldai/chat/__init__.py  |  6 +++---
 ldai/client.py         | 10 +++++-----
 ldai/judge/__init__.py |  2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/ldai/__init__.py b/ldai/__init__.py
index 78125d7..5457f05 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -5,7 +5,7 @@
 from ldai.chat import TrackedChat
 from ldai.client import LDAIClient
 # Export judge
-from ldai.judge import AIJudge
+from ldai.judge import Judge
 # Export models for convenience
 from ldai.models import (  # Deprecated aliases for backward compatibility
     AIAgentConfig, AIAgentConfigDefault, AIAgentConfigRequest, AIAgents,
@@ -25,7 +25,7 @@
     'AICompletionConfigDefault',
     'AIJudgeConfig',
     'AIJudgeConfigDefault',
-    'AIJudge',
+    'Judge',
     'TrackedChat',
     'EvalScore',
     'JudgeConfiguration',
diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
index 1418561..13e8995 100644
--- a/ldai/chat/__init__.py
+++ b/ldai/chat/__init__.py
@@ -3,7 +3,7 @@
 import asyncio
 from typing import Any, Dict, List, Optional
 
-from ldai.judge import AIJudge
+from ldai.judge import Judge
 from ldai.models import AICompletionConfig, LDMessage
 from ldai.providers.ai_provider import AIProvider
 from ldai.providers.types import ChatResponse, JudgeResponse
@@ -24,7 +24,7 @@ def __init__(
         ai_config: AICompletionConfig,
         tracker: LDAIConfigTracker,
         provider: AIProvider,
-        judges: Optional[Dict[str, AIJudge]] = None,
+        judges: Optional[Dict[str, Judge]] = None,
         logger: Optional[Any] = None,
     ):
         """
@@ -151,7 +151,7 @@ def get_provider(self) -> AIProvider:
         """
         return self._provider
 
-    def get_judges(self) -> Dict[str, AIJudge]:
+    def get_judges(self) -> Dict[str, Judge]:
         """
         Get the judges associated with this TrackedChat.
 
diff --git a/ldai/client.py b/ldai/client.py
index 086e99b..fab8afe 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -6,7 +6,7 @@
 from ldclient.client import LDClient
 
 from ldai.chat import TrackedChat
-from ldai.judge import AIJudge
+from ldai.judge import Judge
 from ldai.models import (AIAgentConfig, AIAgentConfigDefault,
                          AIAgentConfigRequest, AIAgents, AICompletionConfig,
                          AICompletionConfigDefault, AIJudgeConfig,
@@ -121,7 +121,7 @@ async def create_judge(
         default_value: AIJudgeConfigDefault,
         variables: Optional[Dict[str, Any]] = None,
         default_ai_provider: Optional[SupportedAIProvider] = None,
-    ) -> Optional[AIJudge]:
+    ) -> Optional[Judge]:
         """
         Creates and returns a new Judge instance for AI evaluation.
 
@@ -182,7 +182,7 @@ async def create_judge(
             if not provider:
                 return None
 
-            return AIJudge(judge_config, judge_config.tracker, provider, self._logger)
+            return Judge(judge_config, judge_config.tracker, provider, self._logger)
         except Exception as error:
             # Would log error if logger available
             return None
@@ -193,7 +193,7 @@ async def _initialize_judges(
         context: Context,
         variables: Optional[Dict[str, Any]] = None,
         default_ai_provider: Optional[SupportedAIProvider] = None,
-    ) -> Dict[str, AIJudge]:
+    ) -> Dict[str, Judge]:
         """
         Initialize judges from judge configurations.
 
@@ -203,7 +203,7 @@ async def _initialize_judges(
         :param default_ai_provider: Optional default AI provider to use
         :return: Dictionary of judge instances keyed by their configuration keys
         """
-        judges: Dict[str, AIJudge] = {}
+        judges: Dict[str, Judge] = {}
 
         async def create_judge_for_config(judge_key: str):
             judge = await self.create_judge(
diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py
index 3caad65..3fbf2a1 100644
--- a/ldai/judge/__init__.py
+++ b/ldai/judge/__init__.py
@@ -13,7 +13,7 @@
 from ldai.tracker import LDAIConfigTracker
 
 
-class AIJudge:
+class Judge:
     """
     Judge implementation that handles evaluation functionality and conversation management.
 

From d57c4f7244891f9fd1fc64488b4cd9115e162bd0 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Mon, 15 Dec 2025 19:00:40 +0100
Subject: [PATCH 22/28] add key to model

---
 ldai/client.py | 3 +++
 ldai/models.py | 1 +
 2 files changed, 4 insertions(+)

diff --git a/ldai/client.py b/ldai/client.py
index fab8afe..2881cf6 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -47,6 +47,7 @@ def completion_config(
         )
 
         config = AICompletionConfig(
+            key=key,
             enabled=bool(enabled),
             model=model,
             messages=messages,
@@ -104,6 +105,7 @@ def judge_config(
         evaluation_metric_keys = variation.get('evaluationMetricKeys', default_value.evaluation_metric_keys or [])
 
         config = AIJudgeConfig(
+            key=key,
             enabled=bool(enabled),
             evaluation_metric_keys=evaluation_metric_keys,
             model=model,
@@ -551,6 +553,7 @@ def __evaluate_agent(
         final_instructions = instructions if instructions is not None else default_value.instructions
 
         return AIAgentConfig(
+            key=key,
             enabled=bool(enabled) if enabled is not None else (default_value.enabled or False),
             model=model or default_value.model,
             provider=provider or default_value.provider,
diff --git a/ldai/models.py b/ldai/models.py
index c075dcf..988d97d 100644
--- a/ldai/models.py
+++ b/ldai/models.py
@@ -170,6 +170,7 @@ class AIConfig:
     """
     Base AI Config interface without mode-specific fields.
     """
+    key: str
     enabled: bool
     model: Optional[ModelConfig] = None
     provider: Optional[ProviderConfig] = None

From 351d4f1ac8b50931af6eb8ecba7f029a6f4d5029 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Tue, 16 Dec 2025 22:07:36 +0100
Subject: [PATCH 23/28] fixes

---
 ldai/providers/types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ldai/providers/types.py b/ldai/providers/types.py
index 5b208d4..e9160cc 100644
--- a/ldai/providers/types.py
+++ b/ldai/providers/types.py
@@ -76,9 +76,9 @@ class JudgeResponse:
     """
     Response from a judge evaluation containing scores and reasoning for multiple metrics.
     """
-    judge_config_key: Optional[str] = None  # The key of the judge configuration that was used to generate this response
     evals: Dict[str, EvalScore]  # Dictionary where keys are metric names and values contain score and reasoning
     success: bool  # Whether the evaluation completed successfully
+    judge_config_key: Optional[str] = None  # The key of the judge configuration that was used to generate this response
     error: Optional[str] = None  # Error message if evaluation failed
 
     def to_dict(self) -> Dict[str, Any]:

From 7a699ef47004fc6b98634546d23aaa00416320b3 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Tue, 16 Dec 2025 22:08:52 +0100
Subject: [PATCH 24/28] fix linting

---
 ldai/tracker.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ldai/tracker.py b/ldai/tracker.py
index 8ed76f5..b4d8b30 100644
--- a/ldai/tracker.py
+++ b/ldai/tracker.py
@@ -228,7 +228,7 @@ def track_judge_response(self, judge_response: Any) -> None:
 
         :param judge_response: JudgeResponse object containing evals and success status
         """
-        from ldai.providers.types import JudgeResponse, EvalScore
+        from ldai.providers.types import EvalScore, JudgeResponse
 
         if isinstance(judge_response, JudgeResponse):
             # Track evaluation scores with judge config key included in metadata
@@ -236,7 +236,7 @@ def track_judge_response(self, judge_response: Any) -> None:
                 track_data = self.__get_track_data()
                 if judge_response.judge_config_key:
                     track_data = {**track_data, 'judgeConfigKey': judge_response.judge_config_key}
-                
+
                 for metric_key, eval_score in judge_response.evals.items():
                     if isinstance(eval_score, EvalScore):
                         self._ld_client.track(

From 8d3bfbbe544af5aad7d23392ca0c2a023b2c1de4 Mon Sep 17 00:00:00 2001
From: Edwin Okonkwo <edwintop15@gmail.com>
Date: Tue, 16 Dec 2025 22:18:00 +0100
Subject: [PATCH 25/28] revert to sync

---
 ldai/testing/test_tracker.py | 14 ++++++--------
 ldai/tracker.py              |  6 +++---
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/ldai/testing/test_tracker.py b/ldai/testing/test_tracker.py
index 2e39d98..57f13fd 100644
--- a/ldai/testing/test_tracker.py
+++ b/ldai/testing/test_tracker.py
@@ -276,8 +276,7 @@ def test_tracks_bedrock_metrics_with_error(client: LDClient):
     assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
 
 
-@pytest.mark.asyncio
-async def test_tracks_openai_metrics(client: LDClient):
+def test_tracks_openai_metrics(client: LDClient):
     context = Context.create("user-key")
     tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
 
@@ -293,10 +292,10 @@ def to_dict(self):
                 "completion_tokens": 110,
             }
 
-    async def get_result():
+    def get_result():
         return Result()
 
-    await tracker.track_openai_metrics(get_result)
+    tracker.track_openai_metrics(get_result)
 
     calls = [
         call(
@@ -330,16 +329,15 @@ async def get_result():
     assert tracker.get_summary().usage == TokenUsage(330, 220, 110)
 
 
-@pytest.mark.asyncio
-async def test_tracks_openai_metrics_with_exception(client: LDClient):
+def test_tracks_openai_metrics_with_exception(client: LDClient):
     context = Context.create("user-key")
     tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context)
 
-    async def raise_exception():
+    def raise_exception():
         raise ValueError("Something went wrong")
 
     try:
-        await tracker.track_openai_metrics(raise_exception)
+        tracker.track_openai_metrics(raise_exception)
         assert False, "Should have thrown an exception"
     except ValueError:
         pass
diff --git a/ldai/tracker.py b/ldai/tracker.py
index b4d8b30..63aa67c 100644
--- a/ldai/tracker.py
+++ b/ldai/tracker.py
@@ -286,7 +286,7 @@ def track_error(self) -> None:
             "$ld:ai:generation:error", self._context, self.__get_track_data(), 1
         )
 
-    async def track_openai_metrics(self, func):
+    def track_openai_metrics(self, func):
         """
         Track OpenAI-specific operations.
 
@@ -300,12 +300,12 @@ async def track_openai_metrics(self, func):
 
         A failed operation will not have any token usage data.
 
-        :param func: Async function to track.
+        :param func: Function to track.
         :return: Result of the tracked function.
         """
         start_time = time.time()
         try:
-            result = await func()
+            result = func()
             end_time = time.time()
             duration = int((end_time - start_time) * 1000)
             self.track_duration(duration)

From 5de380b08bcb5baa07ce8e574b9414e9956c3f2e Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Tue, 16 Dec 2025 23:29:42 +0000
Subject: [PATCH 26/28] judge should set key for responses

---
 ldai/chat/__init__.py  | 1 -
 ldai/judge/__init__.py | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
index 13e8995..1283281 100644
--- a/ldai/chat/__init__.py
+++ b/ldai/chat/__init__.py
@@ -112,7 +112,6 @@ async def evaluate_judge(judge_config):
             )
 
             if eval_result and eval_result.success:
-                eval_result.judge_config_key = judge_config.key
                 self._tracker.track_judge_response(eval_result)
 
             return eval_result
diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py
index 3fbf2a1..058eb13 100644
--- a/ldai/judge/__init__.py
+++ b/ldai/judge/__init__.py
@@ -94,6 +94,7 @@ async def evaluate(
                 success = False
 
             return JudgeResponse(
+                judge_config_key=self._ai_config.key,
                 evals=evals,
                 success=success,
             )

From 07c54548daef7e00cf93f1c005e2941aa03bad9b Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Wed, 17 Dec 2025 04:08:36 +0000
Subject: [PATCH 27/28] use simplified Chat name

---
 ldai/__init__.py      |  4 ++--
 ldai/chat/__init__.py | 14 +++++++-------
 ldai/client.py        | 10 +++++-----
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/ldai/__init__.py b/ldai/__init__.py
index 5457f05..d017cad 100644
--- a/ldai/__init__.py
+++ b/ldai/__init__.py
@@ -2,7 +2,7 @@
 
 # Export main client
 # Export chat
-from ldai.chat import TrackedChat
+from ldai.chat import Chat
 from ldai.client import LDAIClient
 # Export judge
 from ldai.judge import Judge
@@ -26,7 +26,7 @@
     'AIJudgeConfig',
     'AIJudgeConfigDefault',
     'Judge',
-    'TrackedChat',
+    'Chat',
     'EvalScore',
     'JudgeConfiguration',
     'JudgeResponse',
diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
index 1283281..ff9b0c7 100644
--- a/ldai/chat/__init__.py
+++ b/ldai/chat/__init__.py
@@ -1,4 +1,4 @@
-"""TrackedChat implementation for managing AI chat conversations."""
+"""Chat implementation for managing AI chat conversations."""
 
 import asyncio
 from typing import Any, Dict, List, Optional
@@ -10,9 +10,9 @@
 from ldai.tracker import LDAIConfigTracker
 
 
-class TrackedChat:
+class Chat:
     """
-    Concrete implementation of TrackedChat that provides chat functionality
+    Concrete implementation of Chat that provides chat functionality
     by delegating to an AIProvider implementation.
 
     This class handles conversation management and tracking, while delegating
@@ -28,7 +28,7 @@ def __init__(
         logger: Optional[Any] = None,
     ):
         """
-        Initialize the TrackedChat.
+        Initialize the Chat.
 
         :param ai_config: The completion AI configuration
         :param tracker: The tracker for the completion configuration
@@ -126,7 +126,7 @@ async def evaluate_judge(judge_config):
 
     def get_config(self) -> AICompletionConfig:
         """
-        Get the underlying AI configuration used to initialize this TrackedChat.
+        Get the underlying AI configuration used to initialize this Chat.
 
         :return: The AI completion configuration
         """
@@ -134,7 +134,7 @@ def get_config(self) -> AICompletionConfig:
 
     def get_tracker(self) -> LDAIConfigTracker:
         """
-        Get the underlying AI configuration tracker used to initialize this TrackedChat.
+        Get the underlying AI configuration tracker used to initialize this Chat.
 
         :return: The tracker instance
         """
@@ -152,7 +152,7 @@ def get_provider(self) -> AIProvider:
 
     def get_judges(self) -> Dict[str, Judge]:
         """
-        Get the judges associated with this TrackedChat.
+        Get the judges associated with this Chat.
 
         Returns a dictionary of judge instances keyed by their configuration keys.
 
diff --git a/ldai/client.py b/ldai/client.py
index 2881cf6..ea07915 100644
--- a/ldai/client.py
+++ b/ldai/client.py
@@ -5,7 +5,7 @@
 from ldclient import Context
 from ldclient.client import LDClient
 
-from ldai.chat import TrackedChat
+from ldai.chat import Chat
 from ldai.judge import Judge
 from ldai.models import (AIAgentConfig, AIAgentConfigDefault,
                          AIAgentConfigRequest, AIAgents, AICompletionConfig,
@@ -241,16 +241,16 @@ async def create_chat(
         default_value: AICompletionConfigDefault,
         variables: Optional[Dict[str, Any]] = None,
         default_ai_provider: Optional[SupportedAIProvider] = None,
-    ) -> Optional[TrackedChat]:
+    ) -> Optional[Chat]:
         """
-        Creates and returns a new TrackedChat instance for AI chat conversations.
+        Creates and returns a new Chat instance for AI conversations.
 
         :param key: The key identifying the AI completion configuration to use
         :param context: Standard Context used when evaluating flags
         :param default_value: A default value representing a standard AI config result
         :param variables: Dictionary of values for instruction interpolation
         :param default_ai_provider: Optional default AI provider to use
-        :return: TrackedChat instance or None if disabled/unsupported
+        :return: Chat instance or None if disabled/unsupported
 
         Example::
 
@@ -296,7 +296,7 @@ async def create_chat(
                 default_ai_provider,
             )
 
-        return TrackedChat(config, config.tracker, provider, judges, self._logger)
+        return Chat(config, config.tracker, provider, judges, self._logger)
 
     def agent_config(
         self,

From 3c77d764805b2aaa75795dd7eeb1e11027d09c54 Mon Sep 17 00:00:00 2001
From: jsonbailey <jbailey@launchdarkly.com>
Date: Wed, 17 Dec 2025 04:12:28 +0000
Subject: [PATCH 28/28] re-order track_metrics_of params to be more intuitive

---
 ldai/chat/__init__.py  | 2 +-
 ldai/judge/__init__.py | 2 +-
 ldai/tracker.py        | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py
index ff9b0c7..3d7f40f 100644
--- a/ldai/chat/__init__.py
+++ b/ldai/chat/__init__.py
@@ -62,8 +62,8 @@ async def invoke(self, prompt: str) -> ChatResponse:
 
         # Delegate to provider-specific implementation with tracking
         response = await self._tracker.track_metrics_of(
-            lambda result: result.metrics,
             lambda: self._provider.invoke_model(all_messages),
+            lambda result: result.metrics,
         )
 
         # Start judge evaluations as async tasks (don't await them)
diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py
index 058eb13..7158797 100644
--- a/ldai/judge/__init__.py
+++ b/ldai/judge/__init__.py
@@ -80,8 +80,8 @@ async def evaluate(
 
             # Track metrics of the structured model invocation
             response = await self._ai_config_tracker.track_metrics_of(
+                lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure),
                 lambda result: result.metrics,
-                lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure)
             )
 
             success = response.metrics.success
diff --git a/ldai/tracker.py b/ldai/tracker.py
index 63aa67c..e5d7ed2 100644
--- a/ldai/tracker.py
+++ b/ldai/tracker.py
@@ -157,7 +157,7 @@ def track_duration_of(self, func):
 
         return result
 
-    async def track_metrics_of(self, metrics_extractor, func):
+    async def track_metrics_of(self, func, metrics_extractor):
         """
         Track metrics for a generic AI operation.
 
@@ -168,8 +168,8 @@ async def track_metrics_of(self, metrics_extractor, func):
         In the case the provided function throws, this function will record the duration and an error.
         A failed operation will not have any token usage data.
 
-        :param metrics_extractor: Function that extracts LDAIMetrics from the operation result
         :param func: Async function which executes the operation
+        :param metrics_extractor: Function that extracts LDAIMetrics from the operation result
         :return: The result of the operation
         """
         start_time = time.time()