From 5f924aba8d570c606e3686eb1132c15d440ff54e Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Fri, 7 Nov 2025 21:05:35 +0000 Subject: [PATCH 01/28] move dataclass into models --- ldai/__init__.py | 25 ++++ ldai/client.py | 205 ++---------------------------- ldai/models.py | 197 ++++++++++++++++++++++++++++ ldai/testing/test_agents.py | 4 +- ldai/testing/test_model_config.py | 2 +- 5 files changed, 236 insertions(+), 197 deletions(-) create mode 100644 ldai/models.py diff --git a/ldai/__init__.py b/ldai/__init__.py index cb7e545..91b3a2d 100644 --- a/ldai/__init__.py +++ b/ldai/__init__.py @@ -1 +1,26 @@ __version__ = "0.10.1" # x-release-please-version + +# Export main client +from ldai.client import LDAIClient + +# Export models for convenience +from ldai.models import ( + AIConfig, + LDAIAgent, + LDAIAgentConfig, + LDAIAgentDefaults, + LDMessage, + ModelConfig, + ProviderConfig, +) + +__all__ = [ + 'LDAIClient', + 'AIConfig', + 'LDAIAgent', + 'LDAIAgentConfig', + 'LDAIAgentDefaults', + 'LDMessage', + 'ModelConfig', + 'ProviderConfig', +] diff --git a/ldai/client.py b/ldai/client.py index a8bd888..db2a6ad 100644 --- a/ldai/client.py +++ b/ldai/client.py @@ -1,205 +1,22 @@ -from dataclasses import dataclass -from typing import Any, Dict, List, Literal, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import chevron from ldclient import Context from ldclient.client import LDClient +from ldai.models import ( + AIConfig, + LDAIAgent, + LDAIAgentConfig, + LDAIAgentDefaults, + LDAIAgents, + LDMessage, + ModelConfig, + ProviderConfig, +) from ldai.tracker import LDAIConfigTracker -@dataclass -class LDMessage: - role: Literal['system', 'user', 'assistant'] - content: str - - def to_dict(self) -> dict: - """ - Render the given message as a dictionary object. - """ - return { - 'role': self.role, - 'content': self.content, - } - - -class ModelConfig: - """ - Configuration related to the model. - """ - - def __init__(self, name: str, parameters: Optional[Dict[str, Any]] = None, custom: Optional[Dict[str, Any]] = None): - """ - :param name: The name of the model. - :param parameters: Additional model-specific parameters. - :param custom: Additional customer provided data. - """ - self._name = name - self._parameters = parameters - self._custom = custom - - @property - def name(self) -> str: - """ - The name of the model. - """ - return self._name - - def get_parameter(self, key: str) -> Any: - """ - Retrieve model-specific parameters. - - Accessing a named, typed attribute (e.g. name) will result in the call - being delegated to the appropriate property. - """ - if key == 'name': - return self.name - - if self._parameters is None: - return None - - return self._parameters.get(key) - - def get_custom(self, key: str) -> Any: - """ - Retrieve customer provided data. - """ - if self._custom is None: - return None - - return self._custom.get(key) - - def to_dict(self) -> dict: - """ - Render the given model config as a dictionary object. - """ - return { - 'name': self._name, - 'parameters': self._parameters, - 'custom': self._custom, - } - - -class ProviderConfig: - """ - Configuration related to the provider. - """ - - def __init__(self, name: str): - self._name = name - - @property - def name(self) -> str: - """ - The name of the provider. - """ - return self._name - - def to_dict(self) -> dict: - """ - Render the given provider config as a dictionary object. - """ - return { - 'name': self._name, - } - - -@dataclass(frozen=True) -class AIConfig: - enabled: Optional[bool] = None - model: Optional[ModelConfig] = None - messages: Optional[List[LDMessage]] = None - provider: Optional[ProviderConfig] = None - - def to_dict(self) -> dict: - """ - Render the given default values as an AIConfig-compatible dictionary object. - """ - return { - '_ldMeta': { - 'enabled': self.enabled or False, - }, - 'model': self.model.to_dict() if self.model else None, - 'messages': [message.to_dict() for message in self.messages] if self.messages else None, - 'provider': self.provider.to_dict() if self.provider else None, - } - - -@dataclass(frozen=True) -class LDAIAgent: - """ - Represents an AI agent configuration with instructions and model settings. - - An agent is similar to an AIConfig but focuses on instructions rather than messages, - making it suitable for AI assistant/agent use cases. - """ - enabled: Optional[bool] = None - model: Optional[ModelConfig] = None - provider: Optional[ProviderConfig] = None - instructions: Optional[str] = None - tracker: Optional[LDAIConfigTracker] = None - - def to_dict(self) -> Dict[str, Any]: - """ - Render the given agent as a dictionary object. - """ - result: Dict[str, Any] = { - '_ldMeta': { - 'enabled': self.enabled or False, - }, - 'model': self.model.to_dict() if self.model else None, - 'provider': self.provider.to_dict() if self.provider else None, - } - if self.instructions is not None: - result['instructions'] = self.instructions - return result - - -@dataclass(frozen=True) -class LDAIAgentDefaults: - """ - Default values for AI agent configurations. - - Similar to LDAIAgent but without tracker and with optional enabled field, - used as fallback values when agent configurations are not available. - """ - enabled: Optional[bool] = None - model: Optional[ModelConfig] = None - provider: Optional[ProviderConfig] = None - instructions: Optional[str] = None - - def to_dict(self) -> Dict[str, Any]: - """ - Render the given agent defaults as a dictionary object. - """ - result: Dict[str, Any] = { - '_ldMeta': { - 'enabled': self.enabled or False, - }, - 'model': self.model.to_dict() if self.model else None, - 'provider': self.provider.to_dict() if self.provider else None, - } - if self.instructions is not None: - result['instructions'] = self.instructions - return result - - -@dataclass -class LDAIAgentConfig: - """ - Configuration for individual agent in batch requests. - - Combines agent key with its specific default configuration and variables. - """ - key: str - default_value: LDAIAgentDefaults - variables: Optional[Dict[str, Any]] = None - - -# Type alias for multiple agents -LDAIAgents = Dict[str, LDAIAgent] - - class LDAIClient: """The LaunchDarkly AI SDK client object.""" diff --git a/ldai/models.py b/ldai/models.py new file mode 100644 index 0000000..4eef5a2 --- /dev/null +++ b/ldai/models.py @@ -0,0 +1,197 @@ +from dataclasses import dataclass +from typing import Any, Dict, List, Literal, Optional + +from ldai.tracker import LDAIConfigTracker + + +@dataclass +class LDMessage: + role: Literal['system', 'user', 'assistant'] + content: str + + def to_dict(self) -> dict: + """ + Render the given message as a dictionary object. + """ + return { + 'role': self.role, + 'content': self.content, + } + + +class ModelConfig: + """ + Configuration related to the model. + """ + + def __init__(self, name: str, parameters: Optional[Dict[str, Any]] = None, custom: Optional[Dict[str, Any]] = None): + """ + :param name: The name of the model. + :param parameters: Additional model-specific parameters. + :param custom: Additional customer provided data. + """ + self._name = name + self._parameters = parameters + self._custom = custom + + @property + def name(self) -> str: + """ + The name of the model. + """ + return self._name + + def get_parameter(self, key: str) -> Any: + """ + Retrieve model-specific parameters. + + Accessing a named, typed attribute (e.g. name) will result in the call + being delegated to the appropriate property. + """ + if key == 'name': + return self.name + + if self._parameters is None: + return None + + return self._parameters.get(key) + + def get_custom(self, key: str) -> Any: + """ + Retrieve customer provided data. + """ + if self._custom is None: + return None + + return self._custom.get(key) + + def to_dict(self) -> dict: + """ + Render the given model config as a dictionary object. + """ + return { + 'name': self._name, + 'parameters': self._parameters, + 'custom': self._custom, + } + + +class ProviderConfig: + """ + Configuration related to the provider. + """ + + def __init__(self, name: str): + self._name = name + + @property + def name(self) -> str: + """ + The name of the provider. + """ + return self._name + + def to_dict(self) -> dict: + """ + Render the given provider config as a dictionary object. + """ + return { + 'name': self._name, + } + + +@dataclass(frozen=True) +class AIConfig: + enabled: Optional[bool] = None + model: Optional[ModelConfig] = None + messages: Optional[List[LDMessage]] = None + provider: Optional[ProviderConfig] = None + + def to_dict(self) -> dict: + """ + Render the given default values as an AIConfig-compatible dictionary object. + """ + return { + '_ldMeta': { + 'enabled': self.enabled or False, + }, + 'model': self.model.to_dict() if self.model else None, + 'messages': [message.to_dict() for message in self.messages] if self.messages else None, + 'provider': self.provider.to_dict() if self.provider else None, + } + + +@dataclass(frozen=True) +class LDAIAgent: + """ + Represents an AI agent configuration with instructions and model settings. + + An agent is similar to an AIConfig but focuses on instructions rather than messages, + making it suitable for AI assistant/agent use cases. + """ + enabled: Optional[bool] = None + model: Optional[ModelConfig] = None + provider: Optional[ProviderConfig] = None + instructions: Optional[str] = None + tracker: Optional[LDAIConfigTracker] = None + + def to_dict(self) -> Dict[str, Any]: + """ + Render the given agent as a dictionary object. + """ + result: Dict[str, Any] = { + '_ldMeta': { + 'enabled': self.enabled or False, + }, + 'model': self.model.to_dict() if self.model else None, + 'provider': self.provider.to_dict() if self.provider else None, + } + if self.instructions is not None: + result['instructions'] = self.instructions + return result + + +@dataclass(frozen=True) +class LDAIAgentDefaults: + """ + Default values for AI agent configurations. + + Similar to LDAIAgent but without tracker and with optional enabled field, + used as fallback values when agent configurations are not available. + """ + enabled: Optional[bool] = None + model: Optional[ModelConfig] = None + provider: Optional[ProviderConfig] = None + instructions: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + """ + Render the given agent defaults as a dictionary object. + """ + result: Dict[str, Any] = { + '_ldMeta': { + 'enabled': self.enabled or False, + }, + 'model': self.model.to_dict() if self.model else None, + 'provider': self.provider.to_dict() if self.provider else None, + } + if self.instructions is not None: + result['instructions'] = self.instructions + return result + + +@dataclass +class LDAIAgentConfig: + """ + Configuration for individual agent in batch requests. + + Combines agent key with its specific default configuration and variables. + """ + key: str + default_value: LDAIAgentDefaults + variables: Optional[Dict[str, Any]] = None + + +# Type alias for multiple agents +LDAIAgents = Dict[str, LDAIAgent] + diff --git a/ldai/testing/test_agents.py b/ldai/testing/test_agents.py index b2e80c0..755f2e5 100644 --- a/ldai/testing/test_agents.py +++ b/ldai/testing/test_agents.py @@ -2,8 +2,8 @@ from ldclient import Config, Context, LDClient from ldclient.integrations.test_data import TestData -from ldai.client import (LDAIAgentConfig, LDAIAgentDefaults, LDAIClient, - ModelConfig, ProviderConfig) +from ldai import (LDAIAgentConfig, LDAIAgentDefaults, LDAIClient, ModelConfig, + ProviderConfig) @pytest.fixture diff --git a/ldai/testing/test_model_config.py b/ldai/testing/test_model_config.py index 1ffc033..b35389d 100644 --- a/ldai/testing/test_model_config.py +++ b/ldai/testing/test_model_config.py @@ -2,7 +2,7 @@ from ldclient import Config, Context, LDClient from ldclient.integrations.test_data import TestData -from ldai.client import AIConfig, LDAIClient, LDMessage, ModelConfig +from ldai import AIConfig, LDAIClient, LDMessage, ModelConfig @pytest.fixture From 951eda13bc01b0515ef6e1ce042f5647d5fbd43e Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Fri, 7 Nov 2025 21:22:22 +0000 Subject: [PATCH 02/28] create new config types completion, agent, and judges --- ldai/__init__.py | 28 +++++-- ldai/client.py | 200 +++++++++++++++++++++++++++++++++++++---------- ldai/models.py | 189 +++++++++++++++++++++++++++++++++++++++----- 3 files changed, 348 insertions(+), 69 deletions(-) diff --git a/ldai/__init__.py b/ldai/__init__.py index 91b3a2d..11369c9 100644 --- a/ldai/__init__.py +++ b/ldai/__init__.py @@ -5,10 +5,16 @@ # Export models for convenience from ldai.models import ( - AIConfig, - LDAIAgent, - LDAIAgentConfig, - LDAIAgentDefaults, + AIAgentConfig, + AIAgentConfigDefault, + AIAgentConfigRequest, + AIAgents, + AICompletionConfig, + AICompletionConfigDefault, + AIJudgeConfig, + AIJudgeConfigDefault, + Judge, + JudgeConfiguration, LDMessage, ModelConfig, ProviderConfig, @@ -16,10 +22,16 @@ __all__ = [ 'LDAIClient', - 'AIConfig', - 'LDAIAgent', - 'LDAIAgentConfig', - 'LDAIAgentDefaults', + 'AIAgentConfig', + 'AIAgentConfigDefault', + 'AIAgentConfigRequest', + 'AIAgents', + 'AICompletionConfig', + 'AICompletionConfigDefault', + 'AIJudgeConfig', + 'AIJudgeConfigDefault', + 'Judge', + 'JudgeConfiguration', 'LDMessage', 'ModelConfig', 'ProviderConfig', diff --git a/ldai/client.py b/ldai/client.py index db2a6ad..4f3cc9e 100644 --- a/ldai/client.py +++ b/ldai/client.py @@ -5,11 +5,16 @@ from ldclient.client import LDClient from ldai.models import ( - AIConfig, - LDAIAgent, - LDAIAgentConfig, - LDAIAgentDefaults, - LDAIAgents, + AIAgentConfig, + AIAgentConfigDefault, + AIAgentConfigRequest, + AIAgents, + AICompletionConfig, + AICompletionConfigDefault, + AIJudgeConfig, + AIJudgeConfigDefault, + Judge, + JudgeConfiguration, LDMessage, ModelConfig, ProviderConfig, @@ -23,40 +28,103 @@ class LDAIClient: def __init__(self, client: LDClient): self._client = client + def completion_config( + self, + key: str, + context: Context, + default_value: AICompletionConfigDefault, + variables: Optional[Dict[str, Any]] = None, + ) -> AICompletionConfig: + """ + Get the value of a completion configuration. + + :param key: The key of the completion configuration. + :param context: The context to evaluate the completion configuration in. + :param default_value: The default value of the completion configuration. + :param variables: Additional variables for the completion configuration. + :return: The completion configuration with a tracker used for gathering metrics. + """ + self._client.track('$ld:ai:config:function:single', context, key, 1) + + model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate( + key, context, default_value.to_dict(), variables + ) + + config = AICompletionConfig( + enabled=bool(enabled), + model=model, + messages=messages, + provider=provider, + tracker=tracker, + judge_configuration=judge_configuration, + ) + + return config + def config( self, key: str, context: Context, - default_value: AIConfig, + default_value: AICompletionConfigDefault, variables: Optional[Dict[str, Any]] = None, - ) -> Tuple[AIConfig, LDAIConfigTracker]: + ) -> AICompletionConfig: """ Get the value of a model configuration. + .. deprecated:: Use :meth:`completion_config` instead. This method will be removed in a future version. + :param key: The key of the model configuration. :param context: The context to evaluate the model configuration in. :param default_value: The default value of the model configuration. :param variables: Additional variables for the model configuration. :return: The value of the model configuration along with a tracker used for gathering metrics. """ - self._client.track('$ld:ai:config:function:single', context, key, 1) + return self.completion_config(key, context, default_value, variables) - model, provider, messages, instructions, tracker, enabled = self.__evaluate(key, context, default_value.to_dict(), variables) + def judge_config( + self, + key: str, + context: Context, + default_value: AIJudgeConfigDefault, + variables: Optional[Dict[str, Any]] = None, + ) -> AIJudgeConfig: + """ + Get the value of a judge configuration. + + :param key: The key of the judge configuration. + :param context: The context to evaluate the judge configuration in. + :param default_value: The default value of the judge configuration. + :param variables: Additional variables for the judge configuration. + :return: The judge configuration with a tracker used for gathering metrics. + """ + self._client.track('$ld:ai:judge:function:single', context, key, 1) + + model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate( + key, context, default_value.to_dict(), variables + ) - config = AIConfig( + # Extract evaluation_metric_keys from the variation + variation = self._client.variation(key, context, default_value.to_dict()) + evaluation_metric_keys = variation.get('evaluationMetricKeys', default_value.evaluation_metric_keys or []) + + config = AIJudgeConfig( enabled=bool(enabled), + evaluation_metric_keys=evaluation_metric_keys, model=model, messages=messages, provider=provider, + tracker=tracker, ) - return config, tracker + return config - def agent( + def agent_config( self, - config: LDAIAgentConfig, + key: str, context: Context, - ) -> LDAIAgent: + default_value: AIAgentConfigDefault, + variables: Optional[Dict[str, Any]] = None, + ) -> AIAgentConfig: """ Retrieve a single AI Config agent. @@ -65,39 +133,58 @@ def agent( Example:: - agent = client.agent(LDAIAgentConfig( - key='research_agent', - default_value=LDAIAgentDefaults( + agent = client.agent_config( + 'research_agent', + context, + AIAgentConfigDefault( enabled=True, model=ModelConfig('gpt-4'), instructions="You are a research assistant specializing in {{topic}}." ), variables={'topic': 'climate change'} - ), context) + ) if agent.enabled: research_result = agent.instructions # Interpolated instructions agent.tracker.track_success() - :param config: The agent configuration to use. + :param key: The agent configuration key. :param context: The context to evaluate the agent configuration in. - :return: Configured LDAIAgent instance. + :param default_value: Default agent values. + :param variables: Variables for interpolation. + :return: Configured AIAgentConfig instance. """ # Track single agent usage self._client.track( "$ld:ai:agent:function:single", context, - config.key, + key, 1 ) - return self.__evaluate_agent(config.key, context, config.default_value, config.variables) + return self.__evaluate_agent(key, context, default_value, variables) - def agents( + def agent( self, - agent_configs: List[LDAIAgentConfig], + config: AIAgentConfigRequest, context: Context, - ) -> LDAIAgents: + ) -> AIAgentConfig: + """ + Retrieve a single AI Config agent. + + .. deprecated:: Use :meth:`agent_config` instead. This method will be removed in a future version. + + :param config: The agent configuration to use. + :param context: The context to evaluate the agent configuration in. + :return: Configured AIAgentConfig instance. + """ + return self.agent_config(config.key, context, config.default_value, config.variables) + + def agent_configs( + self, + agent_configs: List[AIAgentConfigRequest], + context: Context, + ) -> AIAgents: """ Retrieve multiple AI agent configurations. @@ -107,18 +194,18 @@ def agents( Example:: - agents = client.agents([ - LDAIAgentConfig( + agents = client.agent_configs([ + AIAgentConfigRequest( key='research_agent', - default_value=LDAIAgentDefaults( + default_value=AIAgentConfigDefault( enabled=True, instructions='You are a research assistant.' ), variables={'topic': 'climate change'} ), - LDAIAgentConfig( + AIAgentConfigRequest( key='writing_agent', - default_value=LDAIAgentDefaults( + default_value=AIAgentConfigDefault( enabled=True, instructions='You are a writing assistant.' ), @@ -131,7 +218,7 @@ def agents( :param agent_configs: List of agent configurations to retrieve. :param context: The context to evaluate the agent configurations in. - :return: Dictionary mapping agent keys to their LDAIAgent configurations. + :return: Dictionary mapping agent keys to their AIAgentConfig configurations. """ # Track multiple agents usage agent_count = len(agent_configs) @@ -142,7 +229,7 @@ def agents( agent_count ) - result: LDAIAgents = {} + result: AIAgents = {} for config in agent_configs: agent = self.__evaluate_agent( @@ -155,13 +242,29 @@ def agents( return result + def agents( + self, + agent_configs: List[AIAgentConfigRequest], + context: Context, + ) -> AIAgents: + """ + Retrieve multiple AI agent configurations. + + .. deprecated:: Use :meth:`agent_configs` instead. This method will be removed in a future version. + + :param agent_configs: List of agent configurations to retrieve. + :param context: The context to evaluate the agent configurations in. + :return: Dictionary mapping agent keys to their AIAgentConfig configurations. + """ + return self.agent_configs(agent_configs, context) + def __evaluate( self, key: str, context: Context, default_dict: Dict[str, Any], variables: Optional[Dict[str, Any]] = None, - ) -> Tuple[Optional[ModelConfig], Optional[ProviderConfig], Optional[List[LDMessage]], Optional[str], LDAIConfigTracker, bool]: + ) -> Tuple[Optional[ModelConfig], Optional[ProviderConfig], Optional[List[LDMessage]], Optional[str], LDAIConfigTracker, bool, Optional[Any]]: """ Internal method to evaluate a configuration and extract components. @@ -228,15 +331,31 @@ def __evaluate( enabled = variation.get('_ldMeta', {}).get('enabled', False) - return model, provider_config, messages, instructions, tracker, enabled + # Extract judge configuration + judge_configuration = None + if 'judgeConfiguration' in variation and isinstance(variation['judgeConfiguration'], dict): + judge_config = variation['judgeConfiguration'] + if 'judges' in judge_config and isinstance(judge_config['judges'], list): + judges = [ + Judge( + key=judge['key'], + sampling_rate=judge['samplingRate'] + ) + for judge in judge_config['judges'] + if isinstance(judge, dict) and 'key' in judge and 'samplingRate' in judge + ] + if judges: + judge_configuration = JudgeConfiguration(judges=judges) + + return model, provider_config, messages, instructions, tracker, enabled, judge_configuration def __evaluate_agent( self, key: str, context: Context, - default_value: LDAIAgentDefaults, + default_value: AIAgentConfigDefault, variables: Optional[Dict[str, Any]] = None, - ) -> LDAIAgent: + ) -> AIAgentConfig: """ Internal method to evaluate an agent configuration. @@ -244,21 +363,22 @@ def __evaluate_agent( :param context: The evaluation context. :param default_value: Default agent values. :param variables: Variables for interpolation. - :return: Configured LDAIAgent instance. + :return: Configured AIAgentConfig instance. """ - model, provider, messages, instructions, tracker, enabled = self.__evaluate( + model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate( key, context, default_value.to_dict(), variables ) # For agents, prioritize instructions over messages final_instructions = instructions if instructions is not None else default_value.instructions - return LDAIAgent( - enabled=bool(enabled) if enabled is not None else default_value.enabled, + return AIAgentConfig( + enabled=bool(enabled) if enabled is not None else (default_value.enabled or False), model=model or default_value.model, provider=provider or default_value.provider, instructions=final_instructions, tracker=tracker, + judge_configuration=judge_configuration or default_value.judge_configuration, ) def __interpolate_template(self, template: str, variables: Dict[str, Any]) -> str: diff --git a/ldai/models.py b/ldai/models.py index 4eef5a2..83b5326 100644 --- a/ldai/models.py +++ b/ldai/models.py @@ -100,18 +100,64 @@ def to_dict(self) -> dict: } +# ============================================================================ +# Judge Types +# ============================================================================ + +@dataclass(frozen=True) +class Judge: + """ + Configuration for a single judge attachment. + """ + key: str + sampling_rate: float + + def to_dict(self) -> dict: + """ + Render the judge as a dictionary object. + """ + return { + 'key': self.key, + 'samplingRate': self.sampling_rate, + } + + +@dataclass(frozen=True) +class JudgeConfiguration: + """ + Configuration for judge attachment to AI Configs. + """ + judges: List[Judge] + + def to_dict(self) -> dict: + """ + Render the judge configuration as a dictionary object. + """ + return { + 'judges': [judge.to_dict() for judge in self.judges], + } + + +# ============================================================================ +# Completion Config Types +# ============================================================================ + @dataclass(frozen=True) -class AIConfig: +class AICompletionConfigDefault: + """ + Default Completion AI Config (default mode). + """ enabled: Optional[bool] = None model: Optional[ModelConfig] = None messages: Optional[List[LDMessage]] = None provider: Optional[ProviderConfig] = None + judge_configuration: Optional[JudgeConfiguration] = None def to_dict(self) -> dict: """ - Render the given default values as an AIConfig-compatible dictionary object. + Render the given default values as an AICompletionConfigDefault-compatible dictionary object. """ - return { + result = { '_ldMeta': { 'enabled': self.enabled or False, }, @@ -119,25 +165,59 @@ def to_dict(self) -> dict: 'messages': [message.to_dict() for message in self.messages] if self.messages else None, 'provider': self.provider.to_dict() if self.provider else None, } + if self.judge_configuration is not None: + result['judgeConfiguration'] = self.judge_configuration.to_dict() + return result @dataclass(frozen=True) -class LDAIAgent: +class AICompletionConfig: + """ + Completion AI Config (default mode). """ - Represents an AI agent configuration with instructions and model settings. + enabled: bool + model: Optional[ModelConfig] = None + messages: Optional[List[LDMessage]] = None + provider: Optional[ProviderConfig] = None + tracker: Optional[LDAIConfigTracker] = None + judge_configuration: Optional[JudgeConfiguration] = None + + def to_dict(self) -> dict: + """ + Render the given completion config as a dictionary object. + """ + result = { + '_ldMeta': { + 'enabled': self.enabled, + }, + 'model': self.model.to_dict() if self.model else None, + 'messages': [message.to_dict() for message in self.messages] if self.messages else None, + 'provider': self.provider.to_dict() if self.provider else None, + } + if self.judge_configuration is not None: + result['judgeConfiguration'] = self.judge_configuration.to_dict() + return result + - An agent is similar to an AIConfig but focuses on instructions rather than messages, - making it suitable for AI assistant/agent use cases. +# ============================================================================ +# Agent Config Types +# ============================================================================ + + +@dataclass(frozen=True) +class AIAgentConfigDefault: + """ + Default Agent-specific AI Config with instructions. """ enabled: Optional[bool] = None model: Optional[ModelConfig] = None provider: Optional[ProviderConfig] = None instructions: Optional[str] = None - tracker: Optional[LDAIConfigTracker] = None + judge_configuration: Optional[JudgeConfiguration] = None def to_dict(self) -> Dict[str, Any]: """ - Render the given agent as a dictionary object. + Render the given agent config default as a dictionary object. """ result: Dict[str, Any] = { '_ldMeta': { @@ -148,50 +228,117 @@ def to_dict(self) -> Dict[str, Any]: } if self.instructions is not None: result['instructions'] = self.instructions + if self.judge_configuration is not None: + result['judgeConfiguration'] = self.judge_configuration.to_dict() return result @dataclass(frozen=True) -class LDAIAgentDefaults: +class AIAgentConfig: """ - Default values for AI agent configurations. - - Similar to LDAIAgent but without tracker and with optional enabled field, - used as fallback values when agent configurations are not available. + Agent-specific AI Config with instructions. """ - enabled: Optional[bool] = None + enabled: bool model: Optional[ModelConfig] = None provider: Optional[ProviderConfig] = None instructions: Optional[str] = None + tracker: Optional[LDAIConfigTracker] = None + judge_configuration: Optional[JudgeConfiguration] = None def to_dict(self) -> Dict[str, Any]: """ - Render the given agent defaults as a dictionary object. + Render the given agent config as a dictionary object. """ result: Dict[str, Any] = { '_ldMeta': { - 'enabled': self.enabled or False, + 'enabled': self.enabled, }, 'model': self.model.to_dict() if self.model else None, 'provider': self.provider.to_dict() if self.provider else None, } if self.instructions is not None: result['instructions'] = self.instructions + if self.judge_configuration is not None: + result['judgeConfiguration'] = self.judge_configuration.to_dict() return result +# ============================================================================ +# Judge Config Types +# ============================================================================ + +@dataclass(frozen=True) +class AIJudgeConfigDefault: + """ + Default Judge-specific AI Config with required evaluation metric key. + """ + enabled: Optional[bool] = None + model: Optional[ModelConfig] = None + messages: Optional[List[LDMessage]] = None + provider: Optional[ProviderConfig] = None + evaluation_metric_keys: Optional[List[str]] = None + + def to_dict(self) -> dict: + """ + Render the given judge config default as a dictionary object. + """ + result = { + '_ldMeta': { + 'enabled': self.enabled or False, + }, + 'model': self.model.to_dict() if self.model else None, + 'messages': [message.to_dict() for message in self.messages] if self.messages else None, + 'provider': self.provider.to_dict() if self.provider else None, + } + if self.evaluation_metric_keys is not None: + result['evaluationMetricKeys'] = self.evaluation_metric_keys + return result + + +@dataclass(frozen=True) +class AIJudgeConfig: + """ + Judge-specific AI Config with required evaluation metric key. + """ + enabled: bool + evaluation_metric_keys: List[str] + model: Optional[ModelConfig] = None + messages: Optional[List[LDMessage]] = None + provider: Optional[ProviderConfig] = None + tracker: Optional[LDAIConfigTracker] = None + + def to_dict(self) -> dict: + """ + Render the given judge config as a dictionary object. + """ + result = { + '_ldMeta': { + 'enabled': self.enabled, + }, + 'evaluationMetricKeys': self.evaluation_metric_keys, + 'model': self.model.to_dict() if self.model else None, + 'messages': [message.to_dict() for message in self.messages] if self.messages else None, + 'provider': self.provider.to_dict() if self.provider else None, + } + return result + + +# ============================================================================ +# Agent Request Config +# ============================================================================ + @dataclass -class LDAIAgentConfig: +class AIAgentConfigRequest: """ - Configuration for individual agent in batch requests. + Configuration for a single agent request. Combines agent key with its specific default configuration and variables. """ key: str - default_value: LDAIAgentDefaults + default_value: AIAgentConfigDefault variables: Optional[Dict[str, Any]] = None # Type alias for multiple agents -LDAIAgents = Dict[str, LDAIAgent] +AIAgents = Dict[str, AIAgentConfig] From ae7516be21a304942ddf0a65493eb54e8f8bc984 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Fri, 7 Nov 2025 21:26:32 +0000 Subject: [PATCH 03/28] use inheritance for configs for consistency --- ldai/models.py | 131 +++++++++++++++++++++++-------------------------- 1 file changed, 61 insertions(+), 70 deletions(-) diff --git a/ldai/models.py b/ldai/models.py index 83b5326..e8ddf21 100644 --- a/ldai/models.py +++ b/ldai/models.py @@ -139,61 +139,91 @@ def to_dict(self) -> dict: # ============================================================================ -# Completion Config Types +# Base AI Config Types # ============================================================================ @dataclass(frozen=True) -class AICompletionConfigDefault: +class AIConfigDefault: """ - Default Completion AI Config (default mode). + Base AI Config interface for default implementations with optional enabled property. """ enabled: Optional[bool] = None model: Optional[ModelConfig] = None - messages: Optional[List[LDMessage]] = None provider: Optional[ProviderConfig] = None - judge_configuration: Optional[JudgeConfiguration] = None - def to_dict(self) -> dict: + def _base_to_dict(self) -> Dict[str, Any]: """ - Render the given default values as an AICompletionConfigDefault-compatible dictionary object. + Render the base config fields as a dictionary object. """ - result = { + return { '_ldMeta': { 'enabled': self.enabled or False, }, 'model': self.model.to_dict() if self.model else None, - 'messages': [message.to_dict() for message in self.messages] if self.messages else None, 'provider': self.provider.to_dict() if self.provider else None, } - if self.judge_configuration is not None: - result['judgeConfiguration'] = self.judge_configuration.to_dict() - return result @dataclass(frozen=True) -class AICompletionConfig: +class AIConfig: """ - Completion AI Config (default mode). + Base AI Config interface without mode-specific fields. """ enabled: bool model: Optional[ModelConfig] = None - messages: Optional[List[LDMessage]] = None provider: Optional[ProviderConfig] = None tracker: Optional[LDAIConfigTracker] = None - judge_configuration: Optional[JudgeConfiguration] = None - def to_dict(self) -> dict: + def _base_to_dict(self) -> Dict[str, Any]: """ - Render the given completion config as a dictionary object. + Render the base config fields as a dictionary object. """ - result = { + return { '_ldMeta': { 'enabled': self.enabled, }, 'model': self.model.to_dict() if self.model else None, - 'messages': [message.to_dict() for message in self.messages] if self.messages else None, 'provider': self.provider.to_dict() if self.provider else None, } + + +# ============================================================================ +# Completion Config Types +# ============================================================================ + +@dataclass(frozen=True) +class AICompletionConfigDefault(AIConfigDefault): + """ + Default Completion AI Config (default mode). + """ + messages: Optional[List[LDMessage]] = None + judge_configuration: Optional[JudgeConfiguration] = None + + def to_dict(self) -> dict: + """ + Render the given default values as an AICompletionConfigDefault-compatible dictionary object. + """ + result = self._base_to_dict() + result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None + if self.judge_configuration is not None: + result['judgeConfiguration'] = self.judge_configuration.to_dict() + return result + + +@dataclass(frozen=True) +class AICompletionConfig(AIConfig): + """ + Completion AI Config (default mode). + """ + messages: Optional[List[LDMessage]] = None + judge_configuration: Optional[JudgeConfiguration] = None + + def to_dict(self) -> dict: + """ + Render the given completion config as a dictionary object. + """ + result = self._base_to_dict() + result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None if self.judge_configuration is not None: result['judgeConfiguration'] = self.judge_configuration.to_dict() return result @@ -203,15 +233,11 @@ def to_dict(self) -> dict: # Agent Config Types # ============================================================================ - @dataclass(frozen=True) -class AIAgentConfigDefault: +class AIAgentConfigDefault(AIConfigDefault): """ Default Agent-specific AI Config with instructions. """ - enabled: Optional[bool] = None - model: Optional[ModelConfig] = None - provider: Optional[ProviderConfig] = None instructions: Optional[str] = None judge_configuration: Optional[JudgeConfiguration] = None @@ -219,13 +245,7 @@ def to_dict(self) -> Dict[str, Any]: """ Render the given agent config default as a dictionary object. """ - result: Dict[str, Any] = { - '_ldMeta': { - 'enabled': self.enabled or False, - }, - 'model': self.model.to_dict() if self.model else None, - 'provider': self.provider.to_dict() if self.provider else None, - } + result = self._base_to_dict() if self.instructions is not None: result['instructions'] = self.instructions if self.judge_configuration is not None: @@ -234,28 +254,18 @@ def to_dict(self) -> Dict[str, Any]: @dataclass(frozen=True) -class AIAgentConfig: +class AIAgentConfig(AIConfig): """ Agent-specific AI Config with instructions. """ - enabled: bool - model: Optional[ModelConfig] = None - provider: Optional[ProviderConfig] = None instructions: Optional[str] = None - tracker: Optional[LDAIConfigTracker] = None judge_configuration: Optional[JudgeConfiguration] = None def to_dict(self) -> Dict[str, Any]: """ Render the given agent config as a dictionary object. """ - result: Dict[str, Any] = { - '_ldMeta': { - 'enabled': self.enabled, - }, - 'model': self.model.to_dict() if self.model else None, - 'provider': self.provider.to_dict() if self.provider else None, - } + result = self._base_to_dict() if self.instructions is not None: result['instructions'] = self.instructions if self.judge_configuration is not None: @@ -268,58 +278,39 @@ def to_dict(self) -> Dict[str, Any]: # ============================================================================ @dataclass(frozen=True) -class AIJudgeConfigDefault: +class AIJudgeConfigDefault(AIConfigDefault): """ Default Judge-specific AI Config with required evaluation metric key. """ - enabled: Optional[bool] = None - model: Optional[ModelConfig] = None messages: Optional[List[LDMessage]] = None - provider: Optional[ProviderConfig] = None evaluation_metric_keys: Optional[List[str]] = None def to_dict(self) -> dict: """ Render the given judge config default as a dictionary object. """ - result = { - '_ldMeta': { - 'enabled': self.enabled or False, - }, - 'model': self.model.to_dict() if self.model else None, - 'messages': [message.to_dict() for message in self.messages] if self.messages else None, - 'provider': self.provider.to_dict() if self.provider else None, - } + result = self._base_to_dict() + result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None if self.evaluation_metric_keys is not None: result['evaluationMetricKeys'] = self.evaluation_metric_keys return result @dataclass(frozen=True) -class AIJudgeConfig: +class AIJudgeConfig(AIConfig): """ Judge-specific AI Config with required evaluation metric key. """ - enabled: bool evaluation_metric_keys: List[str] - model: Optional[ModelConfig] = None messages: Optional[List[LDMessage]] = None - provider: Optional[ProviderConfig] = None - tracker: Optional[LDAIConfigTracker] = None def to_dict(self) -> dict: """ Render the given judge config as a dictionary object. """ - result = { - '_ldMeta': { - 'enabled': self.enabled, - }, - 'evaluationMetricKeys': self.evaluation_metric_keys, - 'model': self.model.to_dict() if self.model else None, - 'messages': [message.to_dict() for message in self.messages] if self.messages else None, - 'provider': self.provider.to_dict() if self.provider else None, - } + result = self._base_to_dict() + result['evaluationMetricKeys'] = self.evaluation_metric_keys + result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None return result From 0d933d2d9b0721339a77f2d656aaa50a74fb7d2a Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Fri, 7 Nov 2025 21:26:51 +0000 Subject: [PATCH 04/28] added deprecations for old types --- ldai/__init__.py | 10 ++++++++++ ldai/models.py | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/ldai/__init__.py b/ldai/__init__.py index 11369c9..222c007 100644 --- a/ldai/__init__.py +++ b/ldai/__init__.py @@ -18,6 +18,11 @@ LDMessage, ModelConfig, ProviderConfig, + # Deprecated aliases for backward compatibility + AIConfig, + LDAIAgent, + LDAIAgentConfig, + LDAIAgentDefaults, ) __all__ = [ @@ -35,4 +40,9 @@ 'LDMessage', 'ModelConfig', 'ProviderConfig', + # Deprecated exports + 'AIConfig', + 'LDAIAgent', + 'LDAIAgentConfig', + 'LDAIAgentDefaults', ] diff --git a/ldai/models.py b/ldai/models.py index e8ddf21..f83964a 100644 --- a/ldai/models.py +++ b/ldai/models.py @@ -1,3 +1,4 @@ +import warnings from dataclasses import dataclass from typing import Any, Dict, List, Literal, Optional @@ -333,3 +334,27 @@ class AIAgentConfigRequest: # Type alias for multiple agents AIAgents = Dict[str, AIAgentConfig] + +# ============================================================================ +# Deprecated Type Aliases for Backward Compatibility +# ============================================================================ + +# Note: These are type aliases that point to the new types. +# Since Python uses duck typing, these will work at runtime even if type checkers complain. +# The old AIConfig had optional enabled, so it maps to AICompletionConfigDefault +# The old AIConfig return type had required enabled, so it maps to AICompletionConfig + +# Deprecated: Use AICompletionConfigDefault instead +# This was the old AIConfig with optional enabled (used as input/default) +# Note: We map to AICompletionConfigDefault since the old AIConfig had optional enabled +AIConfig = AICompletionConfigDefault + +# Deprecated: Use AIAgentConfigDefault instead +LDAIAgentDefaults = AIAgentConfigDefault + +# Deprecated: Use AIAgentConfigRequest instead +LDAIAgentConfig = AIAgentConfigRequest + +# Deprecated: Use AIAgentConfig instead (note: this was the old return type) +LDAIAgent = AIAgentConfig + From 82718075cdbab1410729a52ba5f8950a866e5e76 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Fri, 7 Nov 2025 21:35:34 +0000 Subject: [PATCH 05/28] create the ai provider interface and factory --- ldai/models.py | 5 +- ldai/providers/__init__.py | 11 ++ ldai/providers/ai_provider.py | 96 ++++++++++++++++ ldai/providers/ai_provider_factory.py | 154 ++++++++++++++++++++++++++ ldai/providers/types.py | 37 +++++++ 5 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 ldai/providers/__init__.py create mode 100644 ldai/providers/ai_provider.py create mode 100644 ldai/providers/ai_provider_factory.py create mode 100644 ldai/providers/types.py diff --git a/ldai/models.py b/ldai/models.py index f83964a..0b961f7 100644 --- a/ldai/models.py +++ b/ldai/models.py @@ -1,6 +1,6 @@ import warnings from dataclasses import dataclass -from typing import Any, Dict, List, Literal, Optional +from typing import Any, Dict, List, Literal, Optional, Union from ldai.tracker import LDAIConfigTracker @@ -334,6 +334,9 @@ class AIAgentConfigRequest: # Type alias for multiple agents AIAgents = Dict[str, AIAgentConfig] +# Type alias for all AI Config variants +AIConfigKind = Union[AIAgentConfig, AICompletionConfig, AIJudgeConfig] + # ============================================================================ # Deprecated Type Aliases for Backward Compatibility diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py new file mode 100644 index 0000000..8cac547 --- /dev/null +++ b/ldai/providers/__init__.py @@ -0,0 +1,11 @@ +"""AI Provider interfaces and factory for LaunchDarkly AI SDK.""" + +from ldai.providers.ai_provider import AIProvider +from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider + +__all__ = [ + 'AIProvider', + 'AIProviderFactory', + 'SupportedAIProvider', +] + diff --git a/ldai/providers/ai_provider.py b/ldai/providers/ai_provider.py new file mode 100644 index 0000000..5863a74 --- /dev/null +++ b/ldai/providers/ai_provider.py @@ -0,0 +1,96 @@ +"""Abstract base class for AI providers.""" + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Union + +from ldai.models import AIConfigKind, LDMessage +from ldai.providers.types import ChatResponse, StructuredResponse + + +class AIProvider(ABC): + """ + Abstract base class for AI providers that implement chat model functionality. + + This class provides the contract that all provider implementations must follow + to integrate with LaunchDarkly's tracking and configuration capabilities. + + Following the AICHAT spec recommendation to use base classes with non-abstract methods + for better extensibility and backwards compatibility. + """ + + def __init__(self, logger: Optional[Any] = None): + """ + Initialize the AI provider. + + :param logger: Optional logger for logging provider operations. + """ + self.logger = logger + + async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse: + """ + Invoke the chat model with an array of messages. + + This method should convert messages to provider format, invoke the model, + and return a ChatResponse with the result and metrics. + + Default implementation takes no action and returns a placeholder response. + Provider implementations should override this method. + + :param messages: Array of LDMessage objects representing the conversation + :return: ChatResponse containing the model's response + """ + if self.logger: + self.logger.warn('invokeModel not implemented by this provider') + + from ldai.models import LDMessage + from ldai.providers.types import LDAIMetrics + + return ChatResponse( + message=LDMessage(role='assistant', content=''), + metrics=LDAIMetrics(success=False, usage=None), + ) + + async def invoke_structured_model( + self, + messages: List[LDMessage], + response_structure: Dict[str, Any], + ) -> StructuredResponse: + """ + Invoke the chat model with structured output support. + + This method should convert messages to provider format, invoke the model with + structured output configuration, and return a structured response. + + Default implementation takes no action and returns a placeholder response. + Provider implementations should override this method. + + :param messages: Array of LDMessage objects representing the conversation + :param response_structure: Dictionary of output configurations keyed by output name + :return: StructuredResponse containing the structured data + """ + if self.logger: + self.logger.warn('invokeStructuredModel not implemented by this provider') + + from ldai.providers.types import LDAIMetrics + + return StructuredResponse( + data={}, + raw_response='', + metrics=LDAIMetrics(success=False, usage=None), + ) + + @staticmethod + @abstractmethod + async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'AIProvider': + """ + Static method that constructs an instance of the provider. + + Each provider implementation must provide their own static create method + that accepts an AIConfigKind and returns a configured instance. + + :param ai_config: The LaunchDarkly AI configuration + :param logger: Optional logger for the provider + :return: Configured provider instance + """ + raise NotImplementedError('Provider implementations must override the static create method') + diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py new file mode 100644 index 0000000..dab3796 --- /dev/null +++ b/ldai/providers/ai_provider_factory.py @@ -0,0 +1,154 @@ +"""Factory for creating AIProvider instances based on the provider configuration.""" + +import importlib +from typing import Any, List, Literal, Optional, Type + +from ldai.models import AIConfigKind +from ldai.providers.ai_provider import AIProvider + + +# List of supported AI providers +SUPPORTED_AI_PROVIDERS = [ + # Multi-provider packages should be last in the list + 'langchain', +] + +# Type representing the supported AI providers +SupportedAIProvider = Literal['langchain'] + + +class AIProviderFactory: + """ + Factory for creating AIProvider instances based on the provider configuration. + """ + + @staticmethod + async def create( + ai_config: AIConfigKind, + logger: Optional[Any] = None, + default_ai_provider: Optional[SupportedAIProvider] = None, + ) -> Optional[AIProvider]: + """ + Create an AIProvider instance based on the AI configuration. + + This method attempts to load provider-specific implementations dynamically. + Returns None if the provider is not supported. + + :param ai_config: The AI configuration + :param logger: Optional logger for logging provider initialization + :param default_ai_provider: Optional default AI provider to use + :return: AIProvider instance or None if not supported + """ + provider_name = ai_config.provider.name.lower() if ai_config.provider else None + # Determine which providers to try based on default_ai_provider + providers_to_try = AIProviderFactory._get_providers_to_try(default_ai_provider, provider_name) + + # Try each provider in order + for provider_type in providers_to_try: + provider = await AIProviderFactory._try_create_provider(provider_type, ai_config, logger) + if provider: + return provider + + # If no provider was successfully created, log a warning + if logger: + logger.warn( + f"Provider is not supported or failed to initialize: {provider_name or 'unknown'}" + ) + return None + + @staticmethod + def _get_providers_to_try( + default_ai_provider: Optional[SupportedAIProvider], + provider_name: Optional[str], + ) -> List[SupportedAIProvider]: + """ + Determine which providers to try based on default_ai_provider and provider_name. + + :param default_ai_provider: Optional default provider to use + :param provider_name: Optional provider name from config + :return: List of providers to try in order + """ + # If default_ai_provider is set, only try that specific provider + if default_ai_provider: + return [default_ai_provider] + + # If no default_ai_provider is set, try all providers in order + provider_set = set() + + # First try the specific provider if it's supported + if provider_name and provider_name in SUPPORTED_AI_PROVIDERS: + provider_set.add(provider_name) # type: ignore + + # Then try multi-provider packages, but avoid duplicates + multi_provider_packages: List[SupportedAIProvider] = ['langchain', 'vercel'] + for provider in multi_provider_packages: + provider_set.add(provider) + + return list(provider_set) + + @staticmethod + async def _try_create_provider( + provider_type: SupportedAIProvider, + ai_config: AIConfigKind, + logger: Optional[Any] = None, + ) -> Optional[AIProvider]: + """ + Try to create a provider of the specified type. + + :param provider_type: Type of provider to create + :param ai_config: AI configuration + :param logger: Optional logger + :return: AIProvider instance or None if creation failed + """ + provider_mappings = { + 'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'), + 'langchain': ('launchdarkly_server_sdk_ai_langchain', 'LangChainProvider'), + 'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'), + } + + if provider_type not in provider_mappings: + return None + + package_name, provider_class_name = provider_mappings[provider_type] + return await AIProviderFactory._create_provider( + package_name, provider_class_name, ai_config, logger + ) + + @staticmethod + async def _create_provider( + package_name: str, + provider_class_name: str, + ai_config: AIConfigKind, + logger: Optional[Any] = None, + ) -> Optional[AIProvider]: + """ + Create a provider instance dynamically. + + :param package_name: Name of the package containing the provider + :param provider_class_name: Name of the provider class + :param ai_config: AI configuration + :param logger: Optional logger + :return: AIProvider instance or None if creation failed + """ + try: + # Try to dynamically import the provider + # This will work if the package is installed + module = importlib.import_module(package_name) + provider_class: Type[AIProvider] = getattr(module, provider_class_name) + + provider = await provider_class.create(ai_config, logger) + if logger: + logger.debug( + f"Successfully created AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} " + f"with package {package_name}" + ) + return provider + except (ImportError, AttributeError, Exception) as error: + # If the provider is not available or creation fails, return None + if logger: + logger.warn( + f"Error creating AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} " + f"with package {package_name}: {error}" + ) + return None + diff --git a/ldai/providers/types.py b/ldai/providers/types.py new file mode 100644 index 0000000..4bfd692 --- /dev/null +++ b/ldai/providers/types.py @@ -0,0 +1,37 @@ +"""Types for AI provider responses.""" + +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +from ldai.models import LDMessage +from ldai.tracker import TokenUsage + + +@dataclass +class LDAIMetrics: + """ + Metrics information for AI operations that includes success status and token usage. + """ + success: bool + usage: Optional[TokenUsage] = None + + +@dataclass +class ChatResponse: + """ + Chat response structure. + """ + message: LDMessage + metrics: LDAIMetrics + evaluations: Optional[List[Any]] = None # List of JudgeResponse, will be populated later + + +@dataclass +class StructuredResponse: + """ + Structured response from AI models. + """ + data: Dict[str, Any] + raw_response: str + metrics: LDAIMetrics + From 6ee62b45e4e6559ba127f5ed5deae72a1d06eebe Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Fri, 7 Nov 2025 22:51:41 +0000 Subject: [PATCH 06/28] create a langchain implementation of the ai provider --- ldai/providers/__init__.py | 20 +- ldai/providers/ai_provider_factory.py | 21 +- ldai/providers/langchain/__init__.py | 284 ++++++++++++++++++++++++++ 3 files changed, 316 insertions(+), 9 deletions(-) create mode 100644 ldai/providers/langchain/__init__.py diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py index 8cac547..1beffb4 100644 --- a/ldai/providers/__init__.py +++ b/ldai/providers/__init__.py @@ -3,9 +3,19 @@ from ldai.providers.ai_provider import AIProvider from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider -__all__ = [ - 'AIProvider', - 'AIProviderFactory', - 'SupportedAIProvider', -] +# Export LangChain provider if available +try: + from ldai.providers.langchain import LangChainProvider + __all__ = [ + 'AIProvider', + 'AIProviderFactory', + 'LangChainProvider', + 'SupportedAIProvider', + ] +except ImportError: + __all__ = [ + 'AIProvider', + 'AIProviderFactory', + 'SupportedAIProvider', + ] diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py index dab3796..41cc1c2 100644 --- a/ldai/providers/ai_provider_factory.py +++ b/ldai/providers/ai_provider_factory.py @@ -80,7 +80,7 @@ def _get_providers_to_try( provider_set.add(provider_name) # type: ignore # Then try multi-provider packages, but avoid duplicates - multi_provider_packages: List[SupportedAIProvider] = ['langchain', 'vercel'] + multi_provider_packages: List[SupportedAIProvider] = ['langchain'] for provider in multi_provider_packages: provider_set.add(provider) @@ -100,10 +100,23 @@ async def _try_create_provider( :param logger: Optional logger :return: AIProvider instance or None if creation failed """ + # Handle built-in providers (part of this package) + if provider_type == 'langchain': + try: + from ldai.providers.langchain import LangChainProvider + return await LangChainProvider.create(ai_config, logger) + except ImportError as error: + if logger: + logger.warn( + f"Error creating LangChainProvider: {error}. " + f"Make sure langchain and langchain-core packages are installed." + ) + return None + + # For future external providers, use dynamic import provider_mappings = { - 'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'), - 'langchain': ('launchdarkly_server_sdk_ai_langchain', 'LangChainProvider'), - 'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'), + # 'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'), + # 'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'), } if provider_type not in provider_mappings: diff --git a/ldai/providers/langchain/__init__.py b/ldai/providers/langchain/__init__.py new file mode 100644 index 0000000..af84dc8 --- /dev/null +++ b/ldai/providers/langchain/__init__.py @@ -0,0 +1,284 @@ +"""LangChain implementation of AIProvider for LaunchDarkly AI SDK.""" + +from typing import Any, Dict, List, Optional + +from langchain_core.chat_models import BaseChatModel +from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage + +from ldai.models import AIConfigKind, LDMessage +from ldai.providers.ai_provider import AIProvider +from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse +from ldai.tracker import TokenUsage + + +class LangChainProvider(AIProvider): + """ + LangChain implementation of AIProvider. + + This provider integrates LangChain models with LaunchDarkly's tracking capabilities. + """ + + def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None): + """ + Initialize the LangChain provider. + + :param llm: LangChain BaseChatModel instance + :param logger: Optional logger for logging provider operations + """ + super().__init__(logger) + self._llm = llm + + # ============================================================================= + # MAIN FACTORY METHOD + # ============================================================================= + + @staticmethod + async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider': + """ + Static factory method to create a LangChain AIProvider from an AI configuration. + + :param ai_config: The LaunchDarkly AI configuration + :param logger: Optional logger for the provider + :return: Configured LangChainProvider instance + """ + llm = await LangChainProvider.create_langchain_model(ai_config) + return LangChainProvider(llm, logger) + + # ============================================================================= + # INSTANCE METHODS (AIProvider Implementation) + # ============================================================================= + + async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse: + """ + Invoke the LangChain model with an array of messages. + + :param messages: Array of LDMessage objects representing the conversation + :return: ChatResponse containing the model's response + """ + try: + # Convert LDMessage[] to LangChain messages + langchain_messages = LangChainProvider.convert_messages_to_langchain(messages) + + # Get the LangChain response + response: AIMessage = await self._llm.ainvoke(langchain_messages) + + # Generate metrics early (assumes success by default) + metrics = LangChainProvider.get_ai_metrics_from_response(response) + + # Extract text content from the response + content: str = '' + if isinstance(response.content, str): + content = response.content + else: + # Log warning for non-string content (likely multimodal) + if self.logger: + self.logger.warn( + f"Multimodal response not supported, expecting a string. " + f"Content type: {type(response.content)}, Content: {response.content}" + ) + # Update metrics to reflect content loss + metrics.success = False + + # Create the assistant message + from ldai.models import LDMessage + assistant_message = LDMessage(role='assistant', content=content) + + return ChatResponse( + message=assistant_message, + metrics=metrics, + ) + except Exception as error: + if self.logger: + self.logger.warn(f'LangChain model invocation failed: {error}') + + from ldai.models import LDMessage + return ChatResponse( + message=LDMessage(role='assistant', content=''), + metrics=LDAIMetrics(success=False, usage=None), + ) + + async def invoke_structured_model( + self, + messages: List[LDMessage], + response_structure: Dict[str, Any], + ) -> StructuredResponse: + """ + Invoke the LangChain model with structured output support. + + :param messages: Array of LDMessage objects representing the conversation + :param response_structure: Dictionary of output configurations keyed by output name + :return: StructuredResponse containing the structured data + """ + try: + # Convert LDMessage[] to LangChain messages + langchain_messages = LangChainProvider.convert_messages_to_langchain(messages) + + # Get the LangChain response with structured output + # Note: with_structured_output is available on BaseChatModel in newer LangChain versions + if hasattr(self._llm, 'with_structured_output'): + structured_llm = self._llm.with_structured_output(response_structure) + response = await structured_llm.ainvoke(langchain_messages) + else: + # Fallback: invoke normally and try to parse as JSON + response_obj = await self._llm.ainvoke(langchain_messages) + if isinstance(response_obj, AIMessage): + import json + try: + response = json.loads(response_obj.content) + except json.JSONDecodeError: + response = {'content': response_obj.content} + else: + response = response_obj + + # Using structured output doesn't support metrics + metrics = LDAIMetrics( + success=True, + usage=TokenUsage(total=0, input=0, output=0), + ) + + import json + return StructuredResponse( + data=response if isinstance(response, dict) else {'result': response}, + raw_response=json.dumps(response) if not isinstance(response, str) else response, + metrics=metrics, + ) + except Exception as error: + if self.logger: + self.logger.warn(f'LangChain structured model invocation failed: {error}') + + return StructuredResponse( + data={}, + raw_response='', + metrics=LDAIMetrics( + success=False, + usage=TokenUsage(total=0, input=0, output=0), + ), + ) + + def get_chat_model(self) -> BaseChatModel: + """ + Get the underlying LangChain model instance. + + :return: The LangChain BaseChatModel instance + """ + return self._llm + + # ============================================================================= + # STATIC UTILITY METHODS + # ============================================================================= + + @staticmethod + def map_provider(ld_provider_name: str) -> str: + """ + Map LaunchDarkly provider names to LangChain provider names. + + This method enables seamless integration between LaunchDarkly's standardized + provider naming and LangChain's naming conventions. + + :param ld_provider_name: LaunchDarkly provider name + :return: LangChain provider name + """ + lowercased_name = ld_provider_name.lower() + + mapping: Dict[str, str] = { + 'gemini': 'google-genai', + } + + return mapping.get(lowercased_name, lowercased_name) + + @staticmethod + def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics: + """ + Get AI metrics from a LangChain provider response. + + This method extracts token usage information and success status from LangChain responses + and returns a LaunchDarkly LDAIMetrics object. + + :param response: The response from the LangChain model + :return: LDAIMetrics with success status and token usage + """ + # Extract token usage if available + usage: Optional[TokenUsage] = None + if hasattr(response, 'response_metadata') and response.response_metadata: + token_usage = response.response_metadata.get('token_usage') + if token_usage: + usage = TokenUsage( + total=token_usage.get('total_tokens', 0) or token_usage.get('totalTokens', 0) or 0, + input=token_usage.get('prompt_tokens', 0) or token_usage.get('promptTokens', 0) or 0, + output=token_usage.get('completion_tokens', 0) or token_usage.get('completionTokens', 0) or 0, + ) + + # LangChain responses that complete successfully are considered successful by default + return LDAIMetrics(success=True, usage=usage) + + @staticmethod + def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]: + """ + Convert LaunchDarkly messages to LangChain messages. + + This helper method enables developers to work directly with LangChain message types + while maintaining compatibility with LaunchDarkly's standardized message format. + + :param messages: List of LDMessage objects + :return: List of LangChain message objects + """ + result: List[BaseMessage] = [] + for msg in messages: + if msg.role == 'system': + result.append(SystemMessage(content=msg.content)) + elif msg.role == 'user': + result.append(HumanMessage(content=msg.content)) + elif msg.role == 'assistant': + result.append(AIMessage(content=msg.content)) + else: + raise ValueError(f'Unsupported message role: {msg.role}') + return result + + @staticmethod + async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel: + """ + Create a LangChain model from an AI configuration. + + This public helper method enables developers to initialize their own LangChain models + using LaunchDarkly AI configurations. + + :param ai_config: The LaunchDarkly AI configuration + :return: A configured LangChain BaseChatModel + """ + model_name = ai_config.model.name if ai_config.model else '' + provider = ai_config.provider.name if ai_config.provider else '' + parameters = ai_config.model.get_parameter('parameters') if ai_config.model else {} + if not isinstance(parameters, dict): + parameters = {} + + # Use LangChain's init_chat_model to support multiple providers + # Note: This requires langchain package to be installed + try: + # Try to import init_chat_model from langchain.chat_models + # This is available in langchain >= 0.1.0 + try: + from langchain.chat_models import init_chat_model + except ImportError: + # Fallback for older versions or different import path + from langchain.chat_models.universal import init_chat_model + + # Map provider name + langchain_provider = LangChainProvider.map_provider(provider) + + # Create model configuration + model_kwargs = {**parameters} + if langchain_provider: + model_kwargs['model_provider'] = langchain_provider + + # Initialize the chat model (init_chat_model may be async or sync) + result = init_chat_model(model_name, **model_kwargs) + # Handle both sync and async initialization + if hasattr(result, '__await__'): + return await result + return result + except ImportError as e: + raise ImportError( + 'langchain package is required for LangChainProvider. ' + 'Install it with: pip install langchain langchain-core' + ) from e + From 231ae2e226766cb4ec2d11d2d0e69f34a792718c Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Sat, 8 Nov 2025 01:15:01 +0000 Subject: [PATCH 07/28] Add Judge and evaluation metric tracking --- ldai/__init__.py | 11 +- ldai/client.py | 78 +++++++- ldai/judge/__init__.py | 231 ++++++++++++++++++++++++ ldai/judge/evaluation_schema_builder.py | 73 ++++++++ ldai/models.py | 38 ++-- ldai/providers/types.py | 19 ++ ldai/tracker.py | 101 ++++++++++- 7 files changed, 523 insertions(+), 28 deletions(-) create mode 100644 ldai/judge/__init__.py create mode 100644 ldai/judge/evaluation_schema_builder.py diff --git a/ldai/__init__.py b/ldai/__init__.py index 222c007..bba0bb1 100644 --- a/ldai/__init__.py +++ b/ldai/__init__.py @@ -13,7 +13,6 @@ AICompletionConfigDefault, AIJudgeConfig, AIJudgeConfigDefault, - Judge, JudgeConfiguration, LDMessage, ModelConfig, @@ -25,6 +24,12 @@ LDAIAgentDefaults, ) +# Export judge +from ldai.judge import AIJudge + +# Export judge types +from ldai.providers.types import EvalScore, JudgeResponse + __all__ = [ 'LDAIClient', 'AIAgentConfig', @@ -35,8 +40,10 @@ 'AICompletionConfigDefault', 'AIJudgeConfig', 'AIJudgeConfigDefault', - 'Judge', + 'AIJudge', + 'EvalScore', 'JudgeConfiguration', + 'JudgeResponse', 'LDMessage', 'ModelConfig', 'ProviderConfig', diff --git a/ldai/client.py b/ldai/client.py index 4f3cc9e..248fcb6 100644 --- a/ldai/client.py +++ b/ldai/client.py @@ -4,6 +4,7 @@ from ldclient import Context from ldclient.client import LDClient +from ldai.judge import AIJudge from ldai.models import ( AIAgentConfig, AIAgentConfigDefault, @@ -13,12 +14,12 @@ AICompletionConfigDefault, AIJudgeConfig, AIJudgeConfigDefault, - Judge, JudgeConfiguration, LDMessage, ModelConfig, ProviderConfig, ) +from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider from ldai.tracker import LDAIConfigTracker @@ -118,6 +119,79 @@ def judge_config( return config + async def create_judge( + self, + key: str, + context: Context, + default_value: AIJudgeConfigDefault, + variables: Optional[Dict[str, Any]] = None, + default_ai_provider: Optional[SupportedAIProvider] = None, + ) -> Optional[AIJudge]: + """ + Creates and returns a new Judge instance for AI evaluation. + + :param key: The key identifying the AI judge configuration to use + :param context: Standard Context used when evaluating flags + :param default_value: A default value representing a standard AI config result + :param variables: Dictionary of values for instruction interpolation. + The variables `message_history` and `response_to_evaluate` are reserved for the judge and will be ignored. + :param default_ai_provider: Optional default AI provider to use. + :return: Judge instance or None if disabled/unsupported + + Example:: + + judge = client.create_judge( + "relevance-judge", + context, + AIJudgeConfigDefault( + enabled=True, + model=ModelConfig("gpt-4"), + provider=ProviderConfig("openai"), + evaluation_metric_keys=['$ld:ai:judge:relevance'], + messages=[LDMessage(role='system', content='You are a relevance judge.')] + ), + variables={'metric': "relevance"} + ) + + if judge: + result = await judge.evaluate("User question", "AI response") + if result and result.evals: + relevance_eval = result.evals.get('$ld:ai:judge:relevance') + if relevance_eval: + print('Relevance score:', relevance_eval.score) + """ + self._client.track('$ld:ai:judge:function:createJudge', context, key, 1) + + try: + # Warn if reserved variables are provided + if variables: + if 'message_history' in variables: + # Note: Python doesn't have a logger on the client, but we could add one + pass # Would log warning if logger available + if 'response_to_evaluate' in variables: + pass # Would log warning if logger available + + # Overwrite reserved variables to ensure they remain as placeholders for judge evaluation + extended_variables = dict(variables) if variables else {} + extended_variables['message_history'] = '{{message_history}}' + extended_variables['response_to_evaluate'] = '{{response_to_evaluate}}' + + judge_config = self.judge_config(key, context, default_value, extended_variables) + + if not judge_config.enabled or not judge_config.tracker: + # Would log info if logger available + return None + + # Create AI provider for the judge + provider = await AIProviderFactory.create(judge_config, None, default_ai_provider) + if not provider: + return None + + return AIJudge(judge_config, judge_config.tracker, provider, None) + except Exception as error: + # Would log error if logger available + return None + def agent_config( self, key: str, @@ -337,7 +411,7 @@ def __evaluate( judge_config = variation['judgeConfiguration'] if 'judges' in judge_config and isinstance(judge_config['judges'], list): judges = [ - Judge( + JudgeConfiguration.Judge( key=judge['key'], sampling_rate=judge['samplingRate'] ) diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py new file mode 100644 index 0000000..323cd19 --- /dev/null +++ b/ldai/judge/__init__.py @@ -0,0 +1,231 @@ +"""Judge implementation for AI evaluation.""" + +import random +from typing import Any, Dict, Optional + +import chevron + +from ldai.models import AIJudgeConfig, LDMessage +from ldai.providers.ai_provider import AIProvider +from ldai.providers.types import ChatResponse, EvalScore, JudgeResponse, StructuredResponse +from ldai.tracker import LDAIConfigTracker +from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder + + +class AIJudge: + """ + Judge implementation that handles evaluation functionality and conversation management. + + According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate + other AI Configs using structured output. + """ + + def __init__( + self, + ai_config: AIJudgeConfig, + ai_config_tracker: LDAIConfigTracker, + ai_provider: AIProvider, + logger: Optional[Any] = None, + ): + """ + Initialize the Judge. + + :param ai_config: The judge AI configuration + :param ai_config_tracker: The tracker for the judge configuration + :param ai_provider: The AI provider to use for evaluation + :param logger: Optional logger for logging + """ + self._ai_config = ai_config + self._ai_config_tracker = ai_config_tracker + self._ai_provider = ai_provider + self._logger = logger + self._evaluation_response_structure = EvaluationSchemaBuilder.build( + ai_config.evaluation_metric_keys + ) + + async def evaluate( + self, + input_text: str, + output_text: str, + sampling_rate: float = 1.0, + ) -> Optional[JudgeResponse]: + """ + Evaluates an AI response using the judge's configuration. + + :param input_text: The input prompt or question that was provided to the AI + :param output_text: The AI-generated response to be evaluated + :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1) + :return: Evaluation results or None if not sampled + """ + try: + if not self._ai_config.evaluation_metric_keys or len(self._ai_config.evaluation_metric_keys) == 0: + if self._logger: + self._logger.warn( + 'Judge configuration is missing required evaluationMetricKeys' + ) + return None + + if not self._ai_config.messages: + if self._logger: + self._logger.warn('Judge configuration must include messages') + return None + + if random.random() > sampling_rate: + if self._logger: + self._logger.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}') + return None + + messages = self._construct_evaluation_messages(input_text, output_text) + + # Track metrics of the structured model invocation + response = await self._ai_config_tracker.track_metrics_of( + lambda result: result.metrics, + lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure) + ) + + success = response.metrics.success + + evals = self._parse_evaluation_response(response.data) + + if len(evals) != len(self._ai_config.evaluation_metric_keys): + if self._logger: + self._logger.warn('Judge evaluation did not return all evaluations') + success = False + + return JudgeResponse( + evals=evals, + success=success, + ) + except Exception as error: + if self._logger: + self._logger.error(f'Judge evaluation failed: {error}') + return JudgeResponse( + evals={}, + success=False, + error=str(error) if isinstance(error, Exception) else 'Unknown error', + ) + + async def evaluate_messages( + self, + messages: list[LDMessage], + response: ChatResponse, + sampling_ratio: float = 1.0, + ) -> Optional[JudgeResponse]: + """ + Evaluates an AI response from chat messages and response. + + :param messages: Array of messages representing the conversation history + :param response: The AI response to be evaluated + :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1) + :return: Evaluation results or None if not sampled + """ + input_text = '\r\n'.join([msg.content for msg in messages]) if messages else '' + output_text = response.message.content + + return await self.evaluate(input_text, output_text, sampling_ratio) + + def get_ai_config(self) -> AIJudgeConfig: + """ + Returns the AI Config used by this judge. + + :return: The judge AI configuration + """ + return self._ai_config + + def get_tracker(self) -> LDAIConfigTracker: + """ + Returns the tracker associated with this judge. + + :return: The tracker for the judge configuration + """ + return self._ai_config_tracker + + def get_provider(self) -> AIProvider: + """ + Returns the AI provider used by this judge. + + :return: The AI provider + """ + return self._ai_provider + + def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]: + """ + Constructs evaluation messages by combining judge's config messages with input/output. + + :param input_text: The input text + :param output_text: The output text to evaluate + :return: List of messages for evaluation + """ + if not self._ai_config.messages: + return [] + + messages: list[LDMessage] = [] + for msg in self._ai_config.messages: + # Interpolate message content with reserved variables + content = self._interpolate_message(msg.content, { + 'message_history': input_text, + 'response_to_evaluate': output_text, + }) + messages.append(LDMessage(role=msg.role, content=content)) + + return messages + + def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str: + """ + Interpolates message content with variables using Mustache templating. + + :param content: The message content template + :param variables: Variables to interpolate + :return: Interpolated message content + """ + # Use chevron (Mustache) for templating, with no escaping + return chevron.render(content, variables) + + def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]: + """ + Parses the structured evaluation response from the AI provider. + + :param data: The structured response data + :return: Dictionary of evaluation scores keyed by metric key + """ + results: Dict[str, EvalScore] = {} + + if not data.get('evaluations') or not isinstance(data['evaluations'], dict): + if self._logger: + self._logger.warn('Invalid response: missing or invalid evaluations object') + return results + + evaluations = data['evaluations'] + + for metric_key in self._ai_config.evaluation_metric_keys: + evaluation = evaluations.get(metric_key) + + if not evaluation or not isinstance(evaluation, dict): + if self._logger: + self._logger.warn(f'Missing evaluation for metric key: {metric_key}') + continue + + score = evaluation.get('score') + reasoning = evaluation.get('reasoning') + + if not isinstance(score, (int, float)) or score < 0 or score > 1: + if self._logger: + self._logger.warn( + f'Invalid score evaluated for {metric_key}: {score}. ' + 'Score must be a number between 0 and 1 inclusive' + ) + continue + + if not isinstance(reasoning, str): + if self._logger: + self._logger.warn( + f'Invalid reasoning evaluated for {metric_key}: {reasoning}. ' + 'Reasoning must be a string' + ) + continue + + results[metric_key] = EvalScore(score=float(score), reasoning=reasoning) + + return results + + diff --git a/ldai/judge/evaluation_schema_builder.py b/ldai/judge/evaluation_schema_builder.py new file mode 100644 index 0000000..d8d8fa4 --- /dev/null +++ b/ldai/judge/evaluation_schema_builder.py @@ -0,0 +1,73 @@ +"""Internal class for building dynamic evaluation response schemas.""" + +from typing import Any, Dict + + +class EvaluationSchemaBuilder: + """ + Internal class for building dynamic evaluation response schemas. + Not exported - only used internally by Judge. + """ + + @staticmethod + def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]: + """ + Build an evaluation response schema from evaluation metric keys. + + :param evaluation_metric_keys: List of evaluation metric keys + :return: Schema dictionary for structured output + """ + return { + 'type': 'object', + 'properties': { + 'evaluations': { + 'type': 'object', + 'description': f"Object containing evaluation results for {', '.join(evaluation_metric_keys)} metrics", + 'properties': EvaluationSchemaBuilder._build_key_properties(evaluation_metric_keys), + 'required': evaluation_metric_keys, + 'additionalProperties': False, + }, + }, + 'required': ['evaluations'], + 'additionalProperties': False, + } + + @staticmethod + def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]: + """ + Build properties for each evaluation metric key. + + :param evaluation_metric_keys: List of evaluation metric keys + :return: Dictionary of properties for each key + """ + result: Dict[str, Any] = {} + for key in evaluation_metric_keys: + result[key] = EvaluationSchemaBuilder._build_key_schema(key) + return result + + @staticmethod + def _build_key_schema(key: str) -> Dict[str, Any]: + """ + Build schema for a single evaluation metric key. + + :param key: Evaluation metric key + :return: Schema dictionary for the key + """ + return { + 'type': 'object', + 'properties': { + 'score': { + 'type': 'number', + 'minimum': 0, + 'maximum': 1, + 'description': f'Score between 0.0 and 1.0 for {key}', + }, + 'reasoning': { + 'type': 'string', + 'description': f'Reasoning behind the score for {key}', + }, + }, + 'required': ['score', 'reasoning'], + 'additionalProperties': False, + } + diff --git a/ldai/models.py b/ldai/models.py index 0b961f7..4531f8f 100644 --- a/ldai/models.py +++ b/ldai/models.py @@ -105,30 +105,30 @@ def to_dict(self) -> dict: # Judge Types # ============================================================================ -@dataclass(frozen=True) -class Judge: - """ - Configuration for a single judge attachment. - """ - key: str - sampling_rate: float - - def to_dict(self) -> dict: - """ - Render the judge as a dictionary object. - """ - return { - 'key': self.key, - 'samplingRate': self.sampling_rate, - } - - @dataclass(frozen=True) class JudgeConfiguration: """ Configuration for judge attachment to AI Configs. """ - judges: List[Judge] + + @dataclass(frozen=True) + class Judge: + """ + Configuration for a single judge attachment. + """ + key: str + sampling_rate: float + + def to_dict(self) -> dict: + """ + Render the judge as a dictionary object. + """ + return { + 'key': self.key, + 'samplingRate': self.sampling_rate, + } + + judges: List['JudgeConfiguration.Judge'] def to_dict(self) -> dict: """ diff --git a/ldai/providers/types.py b/ldai/providers/types.py index 4bfd692..58ca3fc 100644 --- a/ldai/providers/types.py +++ b/ldai/providers/types.py @@ -35,3 +35,22 @@ class StructuredResponse: raw_response: str metrics: LDAIMetrics + +@dataclass +class EvalScore: + """ + Score and reasoning for a single evaluation metric. + """ + score: float # Score between 0.0 and 1.0 + reasoning: str # Reasoning behind the provided score + + +@dataclass +class JudgeResponse: + """ + Response from a judge evaluation containing scores and reasoning for multiple metrics. + """ + evals: Dict[str, EvalScore] # Dictionary where keys are metric names and values contain score and reasoning + success: bool # Whether the evaluation completed successfully + error: Optional[str] = None # Error message if evaluation failed + diff --git a/ldai/tracker.py b/ldai/tracker.py index a049952..632f0f4 100644 --- a/ldai/tracker.py +++ b/ldai/tracker.py @@ -1,7 +1,7 @@ import time from dataclasses import dataclass from enum import Enum -from typing import Dict, Optional +from typing import Any, Dict, Optional from ldclient import Context, LDClient @@ -144,7 +144,7 @@ def track_duration_of(self, func): An exception occurring during the execution of the function will still track the duration. The exception will be re-thrown. - :param func: Function to track. + :param func: Function to track (synchronous only). :return: Result of the tracked function. """ start_time = time.time() @@ -157,6 +157,90 @@ def track_duration_of(self, func): return result + async def track_metrics_of(self, metrics_extractor, func): + """ + Track metrics for a generic AI operation. + + This function will track the duration of the operation, extract metrics using the provided + metrics extractor function, and track success or error status accordingly. + + If the provided function throws, then this method will also throw. + In the case the provided function throws, this function will record the duration and an error. + A failed operation will not have any token usage data. + + :param metrics_extractor: Function that extracts LDAIMetrics from the operation result + :param func: Async function which executes the operation + :return: The result of the operation + """ + start_time = time.time() + result = None + try: + result = await func() + except Exception as err: + end_time = time.time() + duration = int((end_time - start_time) * 1000) + self.track_duration(duration) + self.track_error() + raise err + + # Track duration after successful call + end_time = time.time() + duration = int((end_time - start_time) * 1000) + self.track_duration(duration) + + # Extract metrics after successful AI call + from ldai.providers.types import LDAIMetrics + metrics = metrics_extractor(result) + + # Track success/error based on metrics + if metrics.success: + self.track_success() + else: + self.track_error() + + # Track token usage if available + if metrics.usage: + self.track_tokens(metrics.usage) + + return result + + def track_eval_scores(self, scores: Dict[str, Any]) -> None: + """ + Track evaluation scores for multiple metrics. + + :param scores: Dictionary mapping metric keys to their evaluation scores (EvalScore objects) + """ + from ldai.providers.types import EvalScore + + # Track each evaluation score individually + for metric_key, eval_score in scores.items(): + if isinstance(eval_score, EvalScore): + self._ld_client.track( + metric_key, + self._context, + self.__get_track_data(), + eval_score.score + ) + + def track_judge_response(self, judge_response: Any) -> None: + """ + Track a judge response, including evaluation scores and success status. + + :param judge_response: JudgeResponse object containing evals and success status + """ + from ldai.providers.types import JudgeResponse + + if isinstance(judge_response, JudgeResponse): + # Track evaluation scores + if judge_response.evals: + self.track_eval_scores(judge_response.evals) + + # Track success/error based on judge response + if judge_response.success: + self.track_success() + else: + self.track_error() + def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None: """ Track user feedback for an AI operation. @@ -197,7 +281,7 @@ def track_error(self) -> None: "$ld:ai:generation:error", self._context, self.__get_track_data(), 1 ) - def track_openai_metrics(self, func): + async def track_openai_metrics(self, func): """ Track OpenAI-specific operations. @@ -211,15 +295,22 @@ def track_openai_metrics(self, func): A failed operation will not have any token usage data. - :param func: Function to track. + :param func: Async function to track. :return: Result of the tracked function. """ + start_time = time.time() try: - result = self.track_duration_of(func) + result = await func() + end_time = time.time() + duration = int((end_time - start_time) * 1000) + self.track_duration(duration) self.track_success() if hasattr(result, "usage") and hasattr(result.usage, "to_dict"): self.track_tokens(_openai_to_token_usage(result.usage.to_dict())) except Exception: + end_time = time.time() + duration = int((end_time - start_time) * 1000) + self.track_duration(duration) self.track_error() raise From 445ab8c9c3488221ea39dfeec94cdd5235d8581e Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Sat, 8 Nov 2025 03:43:01 +0000 Subject: [PATCH 08/28] Add Chat implementation --- ldai/__init__.py | 4 + ldai/chat/__init__.py | 191 ++++++++++++++++++++++++++++++++++++++++++ ldai/client.py | 109 ++++++++++++++++++++++++ 3 files changed, 304 insertions(+) create mode 100644 ldai/chat/__init__.py diff --git a/ldai/__init__.py b/ldai/__init__.py index bba0bb1..617ac3a 100644 --- a/ldai/__init__.py +++ b/ldai/__init__.py @@ -27,6 +27,9 @@ # Export judge from ldai.judge import AIJudge +# Export chat +from ldai.chat import TrackedChat + # Export judge types from ldai.providers.types import EvalScore, JudgeResponse @@ -41,6 +44,7 @@ 'AIJudgeConfig', 'AIJudgeConfigDefault', 'AIJudge', + 'TrackedChat', 'EvalScore', 'JudgeConfiguration', 'JudgeResponse', diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py new file mode 100644 index 0000000..5bdf37a --- /dev/null +++ b/ldai/chat/__init__.py @@ -0,0 +1,191 @@ +"""TrackedChat implementation for managing AI chat conversations.""" + +from typing import Any, Dict, List, Optional + +from ldai.models import AICompletionConfig, LDMessage +from ldai.providers.ai_provider import AIProvider +from ldai.providers.types import ChatResponse, JudgeResponse +from ldai.judge import AIJudge +from ldai.tracker import LDAIConfigTracker + + +class TrackedChat: + """ + Concrete implementation of TrackedChat that provides chat functionality + by delegating to an AIProvider implementation. + + This class handles conversation management and tracking, while delegating + the actual model invocation to the provider. + """ + + def __init__( + self, + ai_config: AICompletionConfig, + tracker: LDAIConfigTracker, + provider: AIProvider, + judges: Optional[Dict[str, AIJudge]] = None, + logger: Optional[Any] = None, + ): + """ + Initialize the TrackedChat. + + :param ai_config: The completion AI configuration + :param tracker: The tracker for the completion configuration + :param provider: The AI provider to use for chat + :param judges: Optional dictionary of judge instances keyed by their configuration keys + :param logger: Optional logger for logging + """ + self._ai_config = ai_config + self._tracker = tracker + self._provider = provider + self._judges = judges or {} + self._logger = logger + self._messages: List[LDMessage] = [] + + async def invoke(self, prompt: str) -> ChatResponse: + """ + Invoke the chat model with a prompt string. + + This method handles conversation management and tracking, delegating to the provider's invoke_model method. + + :param prompt: The user prompt to send to the chat model + :return: ChatResponse containing the model's response and metrics + """ + # Convert prompt string to LDMessage with role 'user' and add to conversation history + user_message: LDMessage = LDMessage(role='user', content=prompt) + self._messages.append(user_message) + + # Prepend config messages to conversation history for model invocation + config_messages = self._ai_config.messages or [] + all_messages = config_messages + self._messages + + # Delegate to provider-specific implementation with tracking + response = await self._tracker.track_metrics_of( + lambda result: result.metrics, + lambda: self._provider.invoke_model(all_messages), + ) + + # Evaluate with judges if configured + if ( + self._ai_config.judge_configuration + and self._ai_config.judge_configuration.judges + and len(self._ai_config.judge_configuration.judges) > 0 + ): + evaluations = await self._evaluate_with_judges(self._messages, response) + response.evaluations = evaluations + + # Add the response message to conversation history + self._messages.append(response.message) + return response + + async def _evaluate_with_judges( + self, + messages: List[LDMessage], + response: ChatResponse, + ) -> List[Optional[JudgeResponse]]: + """ + Evaluates the response with all configured judges. + + Returns a list of evaluation results. + + :param messages: Array of messages representing the conversation history + :param response: The AI response to be evaluated + :return: List of judge evaluation results (may contain None for failed evaluations) + """ + if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges: + return [] + + judge_configs = self._ai_config.judge_configuration.judges + + # Start all judge evaluations in parallel + async def evaluate_judge(judge_config): + judge = self._judges.get(judge_config.key) + if not judge: + if self._logger: + self._logger.warn( + f"Judge configuration is not enabled: {judge_config.key}", + ) + return None + + eval_result = await judge.evaluate_messages( + messages, response, judge_config.sampling_rate + ) + + if eval_result and eval_result.success: + self._tracker.track_eval_scores(eval_result.evals) + + return eval_result + + # Ensure all evaluations complete even if some fail + import asyncio + evaluation_promises = [evaluate_judge(judge_config) for judge_config in judge_configs] + results = await asyncio.gather(*evaluation_promises, return_exceptions=True) + + # Map exceptions to None + return [ + None if isinstance(result, Exception) else result + for result in results + ] + + def get_config(self) -> AICompletionConfig: + """ + Get the underlying AI configuration used to initialize this TrackedChat. + + :return: The AI completion configuration + """ + return self._ai_config + + def get_tracker(self) -> LDAIConfigTracker: + """ + Get the underlying AI configuration tracker used to initialize this TrackedChat. + + :return: The tracker instance + """ + return self._tracker + + def get_provider(self) -> AIProvider: + """ + Get the underlying AI provider instance. + + This provides direct access to the provider for advanced use cases. + + :return: The AI provider instance + """ + return self._provider + + def get_judges(self) -> Dict[str, AIJudge]: + """ + Get the judges associated with this TrackedChat. + + Returns a dictionary of judge instances keyed by their configuration keys. + + :return: Dictionary of judge instances + """ + return self._judges + + def append_messages(self, messages: List[LDMessage]) -> None: + """ + Append messages to the conversation history. + + Adds messages to the conversation history without invoking the model, + which is useful for managing multi-turn conversations or injecting context. + + :param messages: Array of messages to append to the conversation history + """ + self._messages.extend(messages) + + def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]: + """ + Get all messages in the conversation history. + + :param include_config_messages: Whether to include the config messages from the AIConfig. + Defaults to False. + :return: Array of messages. When include_config_messages is True, returns both config + messages and conversation history with config messages prepended. When False, + returns only the conversation history messages. + """ + if include_config_messages: + config_messages = self._ai_config.messages or [] + return config_messages + self._messages + return list(self._messages) + diff --git a/ldai/client.py b/ldai/client.py index 248fcb6..91649d7 100644 --- a/ldai/client.py +++ b/ldai/client.py @@ -4,6 +4,7 @@ from ldclient import Context from ldclient.client import LDClient +from ldai.chat import TrackedChat from ldai.judge import AIJudge from ldai.models import ( AIAgentConfig, @@ -192,6 +193,114 @@ async def create_judge( # Would log error if logger available return None + async def _initialize_judges( + self, + judge_configs: List[JudgeConfiguration.Judge], + context: Context, + variables: Optional[Dict[str, Any]] = None, + default_ai_provider: Optional[SupportedAIProvider] = None, + ) -> Dict[str, AIJudge]: + """ + Initialize judges from judge configurations. + + :param judge_configs: List of judge configurations + :param context: Standard Context used when evaluating flags + :param variables: Dictionary of values for instruction interpolation + :param default_ai_provider: Optional default AI provider to use + :return: Dictionary of judge instances keyed by their configuration keys + """ + judges: Dict[str, AIJudge] = {} + + async def create_judge_for_config(judge_key: str): + judge = await self.create_judge( + judge_key, + context, + AIJudgeConfigDefault(enabled=False), + variables, + default_ai_provider, + ) + return judge_key, judge + + judge_promises = [ + create_judge_for_config(judge_config.key) + for judge_config in judge_configs + ] + + import asyncio + results = await asyncio.gather(*judge_promises, return_exceptions=True) + + for result in results: + if isinstance(result, Exception): + continue + judge_key, judge = result + if judge: + judges[judge_key] = judge + + return judges + + async def create_chat( + self, + key: str, + context: Context, + default_value: AICompletionConfigDefault, + variables: Optional[Dict[str, Any]] = None, + default_ai_provider: Optional[SupportedAIProvider] = None, + ) -> Optional[TrackedChat]: + """ + Creates and returns a new TrackedChat instance for AI chat conversations. + + :param key: The key identifying the AI completion configuration to use + :param context: Standard Context used when evaluating flags + :param default_value: A default value representing a standard AI config result + :param variables: Dictionary of values for instruction interpolation + :param default_ai_provider: Optional default AI provider to use + :return: TrackedChat instance or None if disabled/unsupported + + Example:: + + chat = await client.create_chat( + "customer-support-chat", + context, + AICompletionConfigDefault( + enabled=True, + model=ModelConfig("gpt-4"), + provider=ProviderConfig("openai"), + messages=[LDMessage(role='system', content='You are a helpful assistant.')] + ), + variables={'customerName': 'John'} + ) + + if chat: + response = await chat.invoke("I need help with my order") + print(response.message.content) + + # Access conversation history + messages = chat.get_messages() + print(f"Conversation has {len(messages)} messages") + """ + self._client.track('$ld:ai:config:function:createChat', context, key, 1) + + config = self.completion_config(key, context, default_value, variables) + + if not config.enabled or not config.tracker: + # Would log info if logger available + return None + + provider = await AIProviderFactory.create(config, None, default_ai_provider) + if not provider: + return None + + judges = {} + if config.judge_configuration and config.judge_configuration.judges: + judges = await self._initialize_judges( + config.judge_configuration.judges, + context, + variables, + default_ai_provider, + ) + + return TrackedChat(config, config.tracker, provider, judges, None) + def agent_config( self, key: str, From 5446222a18f7f723cad0fad6b3a0386144dd23ad Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Sat, 8 Nov 2025 03:53:43 +0000 Subject: [PATCH 09/28] Set a default for evaluation metircs --- ldai/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ldai/models.py b/ldai/models.py index 4531f8f..fa36f8c 100644 --- a/ldai/models.py +++ b/ldai/models.py @@ -1,5 +1,5 @@ import warnings -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Dict, List, Literal, Optional, Union from ldai.tracker import LDAIConfigTracker @@ -302,7 +302,7 @@ class AIJudgeConfig(AIConfig): """ Judge-specific AI Config with required evaluation metric key. """ - evaluation_metric_keys: List[str] + evaluation_metric_keys: List[str] = field(default_factory=list) messages: Optional[List[LDMessage]] = None def to_dict(self) -> dict: From bc46608ad4bb66125412ffb23e9e8963804e1aac Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Sat, 8 Nov 2025 17:24:24 +0000 Subject: [PATCH 10/28] add the logger --- ldai/client.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ldai/client.py b/ldai/client.py index 91649d7..4f4b6b9 100644 --- a/ldai/client.py +++ b/ldai/client.py @@ -1,3 +1,4 @@ +import logging from typing import Any, Dict, List, Optional, Tuple import chevron @@ -29,6 +30,7 @@ class LDAIClient: def __init__(self, client: LDClient): self._client = client + self._logger = logging.getLogger('ldclient.ai') def completion_config( self, @@ -184,11 +186,11 @@ async def create_judge( return None # Create AI provider for the judge - provider = await AIProviderFactory.create(judge_config, None, default_ai_provider) + provider = await AIProviderFactory.create(judge_config, self._logger, default_ai_provider) if not provider: return None - return AIJudge(judge_config, judge_config.tracker, provider, None) + return AIJudge(judge_config, judge_config.tracker, provider, self._logger) except Exception as error: # Would log error if logger available return None @@ -279,14 +281,15 @@ async def create_chat( print(f"Conversation has {len(messages)} messages") """ self._client.track('$ld:ai:config:function:createChat', context, key, 1) - + if self._logger: + self._logger.debug(f"Creating chat for key: {key}") config = self.completion_config(key, context, default_value, variables) if not config.enabled or not config.tracker: # Would log info if logger available return None - provider = await AIProviderFactory.create(config, None, default_ai_provider) + provider = await AIProviderFactory.create(config, self._logger, default_ai_provider) if not provider: return None @@ -299,7 +302,7 @@ async def create_chat( default_ai_provider, ) - return TrackedChat(config, config.tracker, provider, judges, None) + return TrackedChat(config, config.tracker, provider, judges, self._logger) def agent_config( self, From fd0aff476dbe6328906be71a734ac9c216d899b2 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Sat, 8 Nov 2025 18:09:59 +0000 Subject: [PATCH 11/28] adjust langchain import --- ldai/providers/langchain/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldai/providers/langchain/__init__.py b/ldai/providers/langchain/__init__.py index af84dc8..f2e2c35 100644 --- a/ldai/providers/langchain/__init__.py +++ b/ldai/providers/langchain/__init__.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Optional -from langchain_core.chat_models import BaseChatModel +from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage from ldai.models import AIConfigKind, LDMessage From c3c939f1b70654596d3b0cff02b28d4c98859515 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Sat, 8 Nov 2025 18:35:13 +0000 Subject: [PATCH 12/28] fix structure response --- ldai/judge/evaluation_schema_builder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ldai/judge/evaluation_schema_builder.py b/ldai/judge/evaluation_schema_builder.py index d8d8fa4..1965e64 100644 --- a/ldai/judge/evaluation_schema_builder.py +++ b/ldai/judge/evaluation_schema_builder.py @@ -18,6 +18,8 @@ def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]: :return: Schema dictionary for structured output """ return { + 'title': 'EvaluationResponse', + 'description': f"Response containing evaluation results for {', '.join(evaluation_metric_keys)} metrics", 'type': 'object', 'properties': { 'evaluations': { From 125bb66252083ca983a9b46a209425f77ae9923c Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Sat, 8 Nov 2025 18:59:13 +0000 Subject: [PATCH 13/28] judge respose should be async --- ldai/chat/__init__.py | 34 ++++++++++++++++------------------ ldai/providers/types.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py index 5bdf37a..0785c16 100644 --- a/ldai/chat/__init__.py +++ b/ldai/chat/__init__.py @@ -1,5 +1,6 @@ """TrackedChat implementation for managing AI chat conversations.""" +import asyncio from typing import Any, Dict, List, Optional from ldai.models import AICompletionConfig, LDMessage @@ -65,39 +66,39 @@ async def invoke(self, prompt: str) -> ChatResponse: lambda: self._provider.invoke_model(all_messages), ) - # Evaluate with judges if configured + # Start judge evaluations as async tasks (don't await them) if ( self._ai_config.judge_configuration and self._ai_config.judge_configuration.judges and len(self._ai_config.judge_configuration.judges) > 0 ): - evaluations = await self._evaluate_with_judges(self._messages, response) - response.evaluations = evaluations + evaluation_tasks = self._start_judge_evaluations(self._messages, response) + response.evaluations = evaluation_tasks # Add the response message to conversation history self._messages.append(response.message) return response - async def _evaluate_with_judges( + def _start_judge_evaluations( self, messages: List[LDMessage], response: ChatResponse, - ) -> List[Optional[JudgeResponse]]: + ) -> List[asyncio.Task[Optional[JudgeResponse]]]: """ - Evaluates the response with all configured judges. + Start judge evaluations as async tasks without awaiting them. - Returns a list of evaluation results. + Returns a list of async tasks that can be awaited later. :param messages: Array of messages representing the conversation history :param response: The AI response to be evaluated - :return: List of judge evaluation results (may contain None for failed evaluations) + :return: List of async tasks that will return judge evaluation results """ if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges: return [] judge_configs = self._ai_config.judge_configuration.judges - # Start all judge evaluations in parallel + # Start all judge evaluations as tasks async def evaluate_judge(judge_config): judge = self._judges.get(judge_config.key) if not judge: @@ -116,16 +117,13 @@ async def evaluate_judge(judge_config): return eval_result - # Ensure all evaluations complete even if some fail - import asyncio - evaluation_promises = [evaluate_judge(judge_config) for judge_config in judge_configs] - results = await asyncio.gather(*evaluation_promises, return_exceptions=True) - - # Map exceptions to None - return [ - None if isinstance(result, Exception) else result - for result in results + # Create tasks for each judge evaluation + tasks = [ + asyncio.create_task(evaluate_judge(judge_config)) + for judge_config in judge_configs ] + + return tasks def get_config(self) -> AICompletionConfig: """ diff --git a/ldai/providers/types.py b/ldai/providers/types.py index 58ca3fc..45df755 100644 --- a/ldai/providers/types.py +++ b/ldai/providers/types.py @@ -15,6 +15,21 @@ class LDAIMetrics: success: bool usage: Optional[TokenUsage] = None + def to_dict(self) -> Dict[str, Any]: + """ + Render the metrics as a dictionary object. + """ + result: Dict[str, Any] = { + 'success': self.success, + } + if self.usage is not None: + result['usage'] = { + 'total': self.usage.total, + 'input': self.usage.input, + 'output': self.usage.output, + } + return result + @dataclass class ChatResponse: @@ -44,6 +59,15 @@ class EvalScore: score: float # Score between 0.0 and 1.0 reasoning: str # Reasoning behind the provided score + def to_dict(self) -> Dict[str, Any]: + """ + Render the evaluation score as a dictionary object. + """ + return { + 'score': self.score, + 'reasoning': self.reasoning, + } + @dataclass class JudgeResponse: @@ -54,3 +78,15 @@ class JudgeResponse: success: bool # Whether the evaluation completed successfully error: Optional[str] = None # Error message if evaluation failed + def to_dict(self) -> Dict[str, Any]: + """ + Render the judge response as a dictionary object. + """ + result: Dict[str, Any] = { + 'evals': {key: eval_score.to_dict() for key, eval_score in self.evals.items()}, + 'success': self.success, + } + if self.error is not None: + result['error'] = self.error + return result + From 63b1d9e29853bceb0e08305880593d499669b141 Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Tue, 9 Dec 2025 18:46:00 +0100 Subject: [PATCH 14/28] fix test --- ldai/testing/test_model_config.py | 22 +++++++++++----------- ldai/testing/test_tracker.py | 15 ++++++++++----- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/ldai/testing/test_model_config.py b/ldai/testing/test_model_config.py index b35389d..13c0523 100644 --- a/ldai/testing/test_model_config.py +++ b/ldai/testing/test_model_config.py @@ -140,7 +140,7 @@ def test_uses_default_on_invalid_flag(ldai_client: LDAIClient): ) variables = {'name': 'World'} - config, _ = ldai_client.config('missing-flag', context, default_value, variables) + config = ldai_client.config('missing-flag', context, default_value, variables) assert config.messages is not None assert len(config.messages) > 0 @@ -162,7 +162,7 @@ def test_model_config_interpolation(ldai_client: LDAIClient): ) variables = {'name': 'World'} - config, _ = ldai_client.config('model-config', context, default_value, variables) + config = ldai_client.config('model-config', context, default_value, variables) assert config.messages is not None assert len(config.messages) > 0 @@ -179,7 +179,7 @@ def test_model_config_no_variables(ldai_client: LDAIClient): context = Context.create('user-key') default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) - config, _ = ldai_client.config('model-config', context, default_value, {}) + config = ldai_client.config('model-config', context, default_value, {}) assert config.messages is not None assert len(config.messages) > 0 @@ -197,7 +197,7 @@ def test_provider_config_handling(ldai_client: LDAIClient): default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World'} - config, _ = ldai_client.config('model-config', context, default_value, variables) + config = ldai_client.config('model-config', context, default_value, variables) assert config.provider is not None assert config.provider.name == 'fakeProvider' @@ -208,7 +208,7 @@ def test_context_interpolation(ldai_client: LDAIClient): default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World'} - config, _ = ldai_client.config( + config = ldai_client.config( 'ctx-interpolation', context, default_value, variables ) @@ -231,7 +231,7 @@ def test_multi_context_interpolation(ldai_client: LDAIClient): default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World'} - config, _ = ldai_client.config( + config = ldai_client.config( 'multi-ctx-interpolation', context, default_value, variables ) @@ -252,7 +252,7 @@ def test_model_config_multiple(ldai_client: LDAIClient): default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World', 'day': 'Monday'} - config, _ = ldai_client.config( + config = ldai_client.config( 'multiple-messages', context, default_value, variables ) @@ -272,7 +272,7 @@ def test_model_config_disabled(ldai_client: LDAIClient): context = Context.create('user-key') default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) - config, _ = ldai_client.config('off-config', context, default_value, {}) + config = ldai_client.config('off-config', context, default_value, {}) assert config.model is not None assert config.enabled is False @@ -285,7 +285,7 @@ def test_model_initial_config_disabled(ldai_client: LDAIClient): context = Context.create('user-key') default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) - config, _ = ldai_client.config('initial-config-disabled', context, default_value, {}) + config = ldai_client.config('initial-config-disabled', context, default_value, {}) assert config.enabled is False assert config.model is None @@ -297,7 +297,7 @@ def test_model_initial_config_enabled(ldai_client: LDAIClient): context = Context.create('user-key') default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) - config, _ = ldai_client.config('initial-config-enabled', context, default_value, {}) + config = ldai_client.config('initial-config-enabled', context, default_value, {}) assert config.enabled is True assert config.model is None @@ -320,7 +320,7 @@ def test_config_method_tracking(ldai_client: LDAIClient): context = Context.create('user-key') default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) - config, tracker = client.config('test-config-key', context, default_value) + config = client.config('test-config-key', context, default_value) mock_client.track.assert_called_once_with( '$ld:ai:config:function:single', diff --git a/ldai/testing/test_tracker.py b/ldai/testing/test_tracker.py index 19c8161..2e39d98 100644 --- a/ldai/testing/test_tracker.py +++ b/ldai/testing/test_tracker.py @@ -276,7 +276,8 @@ def test_tracks_bedrock_metrics_with_error(client: LDClient): assert tracker.get_summary().usage == TokenUsage(330, 220, 110) -def test_tracks_openai_metrics(client: LDClient): +@pytest.mark.asyncio +async def test_tracks_openai_metrics(client: LDClient): context = Context.create("user-key") tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context) @@ -292,7 +293,10 @@ def to_dict(self): "completion_tokens": 110, } - tracker.track_openai_metrics(lambda: Result()) + async def get_result(): + return Result() + + await tracker.track_openai_metrics(get_result) calls = [ call( @@ -326,15 +330,16 @@ def to_dict(self): assert tracker.get_summary().usage == TokenUsage(330, 220, 110) -def test_tracks_openai_metrics_with_exception(client: LDClient): +@pytest.mark.asyncio +async def test_tracks_openai_metrics_with_exception(client: LDClient): context = Context.create("user-key") tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context) - def raise_exception(): + async def raise_exception(): raise ValueError("Something went wrong") try: - tracker.track_openai_metrics(raise_exception) + await tracker.track_openai_metrics(raise_exception) assert False, "Should have thrown an exception" except ValueError: pass From cae7952fe8d3aca10eed8560bed8b5e70dafe4f6 Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Tue, 9 Dec 2025 19:11:29 +0100 Subject: [PATCH 15/28] fix lint --- ldai/__init__.py | 34 +++---------- ldai/chat/__init__.py | 35 +++++++------ ldai/client.py | 38 ++++++--------- ldai/judge/__init__.py | 27 +++++----- ldai/judge/evaluation_schema_builder.py | 7 ++- ldai/models.py | 18 +++---- ldai/providers/__init__.py | 4 +- ldai/providers/ai_provider.py | 31 ++++++------ ldai/providers/ai_provider_factory.py | 16 +++--- ldai/providers/langchain/__init__.py | 65 +++++++++++++++---------- ldai/providers/types.py | 1 - ldai/testing/test_model_config.py | 25 +++++----- ldai/tracker.py | 6 +-- setup.cfg | 2 +- 14 files changed, 143 insertions(+), 166 deletions(-) diff --git a/ldai/__init__.py b/ldai/__init__.py index 617ac3a..78125d7 100644 --- a/ldai/__init__.py +++ b/ldai/__init__.py @@ -1,35 +1,17 @@ __version__ = "0.10.1" # x-release-please-version # Export main client +# Export chat +from ldai.chat import TrackedChat from ldai.client import LDAIClient - -# Export models for convenience -from ldai.models import ( - AIAgentConfig, - AIAgentConfigDefault, - AIAgentConfigRequest, - AIAgents, - AICompletionConfig, - AICompletionConfigDefault, - AIJudgeConfig, - AIJudgeConfigDefault, - JudgeConfiguration, - LDMessage, - ModelConfig, - ProviderConfig, - # Deprecated aliases for backward compatibility - AIConfig, - LDAIAgent, - LDAIAgentConfig, - LDAIAgentDefaults, -) - # Export judge from ldai.judge import AIJudge - -# Export chat -from ldai.chat import TrackedChat - +# Export models for convenience +from ldai.models import ( # Deprecated aliases for backward compatibility + AIAgentConfig, AIAgentConfigDefault, AIAgentConfigRequest, AIAgents, + AICompletionConfig, AICompletionConfigDefault, AIConfig, AIJudgeConfig, + AIJudgeConfigDefault, JudgeConfiguration, LDAIAgent, LDAIAgentConfig, + LDAIAgentDefaults, LDMessage, ModelConfig, ProviderConfig) # Export judge types from ldai.providers.types import EvalScore, JudgeResponse diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py index 0785c16..bcb4284 100644 --- a/ldai/chat/__init__.py +++ b/ldai/chat/__init__.py @@ -3,10 +3,10 @@ import asyncio from typing import Any, Dict, List, Optional +from ldai.judge import AIJudge from ldai.models import AICompletionConfig, LDMessage from ldai.providers.ai_provider import AIProvider from ldai.providers.types import ChatResponse, JudgeResponse -from ldai.judge import AIJudge from ldai.tracker import LDAIConfigTracker @@ -14,7 +14,7 @@ class TrackedChat: """ Concrete implementation of TrackedChat that provides chat functionality by delegating to an AIProvider implementation. - + This class handles conversation management and tracking, while delegating the actual model invocation to the provider. """ @@ -29,7 +29,7 @@ def __init__( ): """ Initialize the TrackedChat. - + :param ai_config: The completion AI configuration :param tracker: The tracker for the completion configuration :param provider: The AI provider to use for chat @@ -46,9 +46,9 @@ def __init__( async def invoke(self, prompt: str) -> ChatResponse: """ Invoke the chat model with a prompt string. - + This method handles conversation management and tracking, delegating to the provider's invoke_model method. - + :param prompt: The user prompt to send to the chat model :return: ChatResponse containing the model's response and metrics """ @@ -86,9 +86,9 @@ def _start_judge_evaluations( ) -> List[asyncio.Task[Optional[JudgeResponse]]]: """ Start judge evaluations as async tasks without awaiting them. - + Returns a list of async tasks that can be awaited later. - + :param messages: Array of messages representing the conversation history :param response: The AI response to be evaluated :return: List of async tasks that will return judge evaluation results @@ -122,13 +122,13 @@ async def evaluate_judge(judge_config): asyncio.create_task(evaluate_judge(judge_config)) for judge_config in judge_configs ] - + return tasks def get_config(self) -> AICompletionConfig: """ Get the underlying AI configuration used to initialize this TrackedChat. - + :return: The AI completion configuration """ return self._ai_config @@ -136,7 +136,7 @@ def get_config(self) -> AICompletionConfig: def get_tracker(self) -> LDAIConfigTracker: """ Get the underlying AI configuration tracker used to initialize this TrackedChat. - + :return: The tracker instance """ return self._tracker @@ -144,9 +144,9 @@ def get_tracker(self) -> LDAIConfigTracker: def get_provider(self) -> AIProvider: """ Get the underlying AI provider instance. - + This provides direct access to the provider for advanced use cases. - + :return: The AI provider instance """ return self._provider @@ -154,9 +154,9 @@ def get_provider(self) -> AIProvider: def get_judges(self) -> Dict[str, AIJudge]: """ Get the judges associated with this TrackedChat. - + Returns a dictionary of judge instances keyed by their configuration keys. - + :return: Dictionary of judge instances """ return self._judges @@ -164,10 +164,10 @@ def get_judges(self) -> Dict[str, AIJudge]: def append_messages(self, messages: List[LDMessage]) -> None: """ Append messages to the conversation history. - + Adds messages to the conversation history without invoking the model, which is useful for managing multi-turn conversations or injecting context. - + :param messages: Array of messages to append to the conversation history """ self._messages.extend(messages) @@ -175,7 +175,7 @@ def append_messages(self, messages: List[LDMessage]) -> None: def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]: """ Get all messages in the conversation history. - + :param include_config_messages: Whether to include the config messages from the AIConfig. Defaults to False. :return: Array of messages. When include_config_messages is True, returns both config @@ -186,4 +186,3 @@ def get_messages(self, include_config_messages: bool = False) -> List[LDMessage] config_messages = self._ai_config.messages or [] return config_messages + self._messages return list(self._messages) - diff --git a/ldai/client.py b/ldai/client.py index 4f4b6b9..086e99b 100644 --- a/ldai/client.py +++ b/ldai/client.py @@ -7,21 +7,13 @@ from ldai.chat import TrackedChat from ldai.judge import AIJudge -from ldai.models import ( - AIAgentConfig, - AIAgentConfigDefault, - AIAgentConfigRequest, - AIAgents, - AICompletionConfig, - AICompletionConfigDefault, - AIJudgeConfig, - AIJudgeConfigDefault, - JudgeConfiguration, - LDMessage, - ModelConfig, - ProviderConfig, -) -from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider +from ldai.models import (AIAgentConfig, AIAgentConfigDefault, + AIAgentConfigRequest, AIAgents, AICompletionConfig, + AICompletionConfigDefault, AIJudgeConfig, + AIJudgeConfigDefault, JudgeConfiguration, LDMessage, + ModelConfig, ProviderConfig) +from ldai.providers.ai_provider_factory import (AIProviderFactory, + SupportedAIProvider) from ldai.tracker import LDAIConfigTracker @@ -204,7 +196,7 @@ async def _initialize_judges( ) -> Dict[str, AIJudge]: """ Initialize judges from judge configurations. - + :param judge_configs: List of judge configurations :param context: Standard Context used when evaluating flags :param variables: Dictionary of values for instruction interpolation @@ -212,7 +204,7 @@ async def _initialize_judges( :return: Dictionary of judge instances keyed by their configuration keys """ judges: Dict[str, AIJudge] = {} - + async def create_judge_for_config(judge_key: str): judge = await self.create_judge( judge_key, @@ -222,22 +214,22 @@ async def create_judge_for_config(judge_key: str): default_ai_provider, ) return judge_key, judge - + judge_promises = [ create_judge_for_config(judge_config.key) for judge_config in judge_configs ] - + import asyncio results = await asyncio.gather(*judge_promises, return_exceptions=True) - + for result in results: if isinstance(result, Exception): continue - judge_key, judge = result + judge_key, judge = result # type: ignore[misc] if judge: judges[judge_key] = judge - + return judges async def create_chat( @@ -275,7 +267,7 @@ async def create_chat( if chat: response = await chat.invoke("I need help with my order") print(response.message.content) - + # Access conversation history messages = chat.get_messages() print(f"Conversation has {len(messages)} messages") diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py index 323cd19..3caad65 100644 --- a/ldai/judge/__init__.py +++ b/ldai/judge/__init__.py @@ -5,17 +5,18 @@ import chevron +from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder from ldai.models import AIJudgeConfig, LDMessage from ldai.providers.ai_provider import AIProvider -from ldai.providers.types import ChatResponse, EvalScore, JudgeResponse, StructuredResponse +from ldai.providers.types import (ChatResponse, EvalScore, JudgeResponse, + StructuredResponse) from ldai.tracker import LDAIConfigTracker -from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder class AIJudge: """ Judge implementation that handles evaluation functionality and conversation management. - + According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate other AI Configs using structured output. """ @@ -29,7 +30,7 @@ def __init__( ): """ Initialize the Judge. - + :param ai_config: The judge AI configuration :param ai_config_tracker: The tracker for the judge configuration :param ai_provider: The AI provider to use for evaluation @@ -51,7 +52,7 @@ async def evaluate( ) -> Optional[JudgeResponse]: """ Evaluates an AI response using the judge's configuration. - + :param input_text: The input prompt or question that was provided to the AI :param output_text: The AI-generated response to be evaluated :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1) @@ -113,7 +114,7 @@ async def evaluate_messages( ) -> Optional[JudgeResponse]: """ Evaluates an AI response from chat messages and response. - + :param messages: Array of messages representing the conversation history :param response: The AI response to be evaluated :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1) @@ -127,7 +128,7 @@ async def evaluate_messages( def get_ai_config(self) -> AIJudgeConfig: """ Returns the AI Config used by this judge. - + :return: The judge AI configuration """ return self._ai_config @@ -135,7 +136,7 @@ def get_ai_config(self) -> AIJudgeConfig: def get_tracker(self) -> LDAIConfigTracker: """ Returns the tracker associated with this judge. - + :return: The tracker for the judge configuration """ return self._ai_config_tracker @@ -143,7 +144,7 @@ def get_tracker(self) -> LDAIConfigTracker: def get_provider(self) -> AIProvider: """ Returns the AI provider used by this judge. - + :return: The AI provider """ return self._ai_provider @@ -151,7 +152,7 @@ def get_provider(self) -> AIProvider: def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]: """ Constructs evaluation messages by combining judge's config messages with input/output. - + :param input_text: The input text :param output_text: The output text to evaluate :return: List of messages for evaluation @@ -173,7 +174,7 @@ def _construct_evaluation_messages(self, input_text: str, output_text: str) -> l def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str: """ Interpolates message content with variables using Mustache templating. - + :param content: The message content template :param variables: Variables to interpolate :return: Interpolated message content @@ -184,7 +185,7 @@ def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str: def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]: """ Parses the structured evaluation response from the AI provider. - + :param data: The structured response data :return: Dictionary of evaluation scores keyed by metric key """ @@ -227,5 +228,3 @@ def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScor results[metric_key] = EvalScore(score=float(score), reasoning=reasoning) return results - - diff --git a/ldai/judge/evaluation_schema_builder.py b/ldai/judge/evaluation_schema_builder.py index 1965e64..c996f08 100644 --- a/ldai/judge/evaluation_schema_builder.py +++ b/ldai/judge/evaluation_schema_builder.py @@ -13,7 +13,7 @@ class EvaluationSchemaBuilder: def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]: """ Build an evaluation response schema from evaluation metric keys. - + :param evaluation_metric_keys: List of evaluation metric keys :return: Schema dictionary for structured output """ @@ -38,7 +38,7 @@ def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]: def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]: """ Build properties for each evaluation metric key. - + :param evaluation_metric_keys: List of evaluation metric keys :return: Dictionary of properties for each key """ @@ -51,7 +51,7 @@ def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]: def _build_key_schema(key: str) -> Dict[str, Any]: """ Build schema for a single evaluation metric key. - + :param key: Evaluation metric key :return: Schema dictionary for the key """ @@ -72,4 +72,3 @@ def _build_key_schema(key: str) -> Dict[str, Any]: 'required': ['score', 'reasoning'], 'additionalProperties': False, } - diff --git a/ldai/models.py b/ldai/models.py index fa36f8c..c075dcf 100644 --- a/ldai/models.py +++ b/ldai/models.py @@ -110,7 +110,7 @@ class JudgeConfiguration: """ Configuration for judge attachment to AI Configs. """ - + @dataclass(frozen=True) class Judge: """ @@ -127,7 +127,7 @@ def to_dict(self) -> dict: 'key': self.key, 'samplingRate': self.sampling_rate, } - + judges: List['JudgeConfiguration.Judge'] def to_dict(self) -> dict: @@ -342,15 +342,10 @@ class AIAgentConfigRequest: # Deprecated Type Aliases for Backward Compatibility # ============================================================================ -# Note: These are type aliases that point to the new types. -# Since Python uses duck typing, these will work at runtime even if type checkers complain. -# The old AIConfig had optional enabled, so it maps to AICompletionConfigDefault -# The old AIConfig return type had required enabled, so it maps to AICompletionConfig - -# Deprecated: Use AICompletionConfigDefault instead -# This was the old AIConfig with optional enabled (used as input/default) -# Note: We map to AICompletionConfigDefault since the old AIConfig had optional enabled -AIConfig = AICompletionConfigDefault +# Note: AIConfig is now defined above as a base class (line 169). +# For backward compatibility, code should migrate to: +# - Use AICompletionConfigDefault for default/input values +# - Use AICompletionConfig for return values # Deprecated: Use AIAgentConfigDefault instead LDAIAgentDefaults = AIAgentConfigDefault @@ -360,4 +355,3 @@ class AIAgentConfigRequest: # Deprecated: Use AIAgentConfig instead (note: this was the old return type) LDAIAgent = AIAgentConfig - diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py index 1beffb4..1b3452b 100644 --- a/ldai/providers/__init__.py +++ b/ldai/providers/__init__.py @@ -1,7 +1,8 @@ """AI Provider interfaces and factory for LaunchDarkly AI SDK.""" from ldai.providers.ai_provider import AIProvider -from ldai.providers.ai_provider_factory import AIProviderFactory, SupportedAIProvider +from ldai.providers.ai_provider_factory import (AIProviderFactory, + SupportedAIProvider) # Export LangChain provider if available try: @@ -18,4 +19,3 @@ 'AIProviderFactory', 'SupportedAIProvider', ] - diff --git a/ldai/providers/ai_provider.py b/ldai/providers/ai_provider.py index 5863a74..cc7b21e 100644 --- a/ldai/providers/ai_provider.py +++ b/ldai/providers/ai_provider.py @@ -10,10 +10,10 @@ class AIProvider(ABC): """ Abstract base class for AI providers that implement chat model functionality. - + This class provides the contract that all provider implementations must follow to integrate with LaunchDarkly's tracking and configuration capabilities. - + Following the AICHAT spec recommendation to use base classes with non-abstract methods for better extensibility and backwards compatibility. """ @@ -21,7 +21,7 @@ class AIProvider(ABC): def __init__(self, logger: Optional[Any] = None): """ Initialize the AI provider. - + :param logger: Optional logger for logging provider operations. """ self.logger = logger @@ -29,22 +29,22 @@ def __init__(self, logger: Optional[Any] = None): async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse: """ Invoke the chat model with an array of messages. - + This method should convert messages to provider format, invoke the model, and return a ChatResponse with the result and metrics. - + Default implementation takes no action and returns a placeholder response. Provider implementations should override this method. - + :param messages: Array of LDMessage objects representing the conversation :return: ChatResponse containing the model's response """ if self.logger: self.logger.warn('invokeModel not implemented by this provider') - + from ldai.models import LDMessage from ldai.providers.types import LDAIMetrics - + return ChatResponse( message=LDMessage(role='assistant', content=''), metrics=LDAIMetrics(success=False, usage=None), @@ -57,22 +57,22 @@ async def invoke_structured_model( ) -> StructuredResponse: """ Invoke the chat model with structured output support. - + This method should convert messages to provider format, invoke the model with structured output configuration, and return a structured response. - + Default implementation takes no action and returns a placeholder response. Provider implementations should override this method. - + :param messages: Array of LDMessage objects representing the conversation :param response_structure: Dictionary of output configurations keyed by output name :return: StructuredResponse containing the structured data """ if self.logger: self.logger.warn('invokeStructuredModel not implemented by this provider') - + from ldai.providers.types import LDAIMetrics - + return StructuredResponse( data={}, raw_response='', @@ -84,13 +84,12 @@ async def invoke_structured_model( async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'AIProvider': """ Static method that constructs an instance of the provider. - + Each provider implementation must provide their own static create method that accepts an AIConfigKind and returns a configured instance. - + :param ai_config: The LaunchDarkly AI configuration :param logger: Optional logger for the provider :return: Configured provider instance """ raise NotImplementedError('Provider implementations must override the static create method') - diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py index 41cc1c2..5dd441d 100644 --- a/ldai/providers/ai_provider_factory.py +++ b/ldai/providers/ai_provider_factory.py @@ -6,7 +6,6 @@ from ldai.models import AIConfigKind from ldai.providers.ai_provider import AIProvider - # List of supported AI providers SUPPORTED_AI_PROVIDERS = [ # Multi-provider packages should be last in the list @@ -30,10 +29,10 @@ async def create( ) -> Optional[AIProvider]: """ Create an AIProvider instance based on the AI configuration. - + This method attempts to load provider-specific implementations dynamically. Returns None if the provider is not supported. - + :param ai_config: The AI configuration :param logger: Optional logger for logging provider initialization :param default_ai_provider: Optional default AI provider to use @@ -63,7 +62,7 @@ def _get_providers_to_try( ) -> List[SupportedAIProvider]: """ Determine which providers to try based on default_ai_provider and provider_name. - + :param default_ai_provider: Optional default provider to use :param provider_name: Optional provider name from config :return: List of providers to try in order @@ -84,7 +83,9 @@ def _get_providers_to_try( for provider in multi_provider_packages: provider_set.add(provider) - return list(provider_set) + # Return list of providers, converting from set + # The set contains strings that should be valid SupportedAIProvider values + return list(provider_set) # type: ignore[arg-type] @staticmethod async def _try_create_provider( @@ -94,7 +95,7 @@ async def _try_create_provider( ) -> Optional[AIProvider]: """ Try to create a provider of the specified type. - + :param provider_type: Type of provider to create :param ai_config: AI configuration :param logger: Optional logger @@ -136,7 +137,7 @@ async def _create_provider( ) -> Optional[AIProvider]: """ Create a provider instance dynamically. - + :param package_name: Name of the package containing the provider :param provider_class_name: Name of the provider class :param ai_config: AI configuration @@ -164,4 +165,3 @@ async def _create_provider( f"with package {package_name}: {error}" ) return None - diff --git a/ldai/providers/langchain/__init__.py b/ldai/providers/langchain/__init__.py index f2e2c35..f1a5aa1 100644 --- a/ldai/providers/langchain/__init__.py +++ b/ldai/providers/langchain/__init__.py @@ -3,7 +3,8 @@ from typing import Any, Dict, List, Optional from langchain_core.language_models.chat_models import BaseChatModel -from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage +from langchain_core.messages import (AIMessage, BaseMessage, HumanMessage, + SystemMessage) from ldai.models import AIConfigKind, LDMessage from ldai.providers.ai_provider import AIProvider @@ -14,14 +15,14 @@ class LangChainProvider(AIProvider): """ LangChain implementation of AIProvider. - + This provider integrates LangChain models with LaunchDarkly's tracking capabilities. """ def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None): """ Initialize the LangChain provider. - + :param llm: LangChain BaseChatModel instance :param logger: Optional logger for logging provider operations """ @@ -36,7 +37,7 @@ def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None): async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider': """ Static factory method to create a LangChain AIProvider from an AI configuration. - + :param ai_config: The LaunchDarkly AI configuration :param logger: Optional logger for the provider :return: Configured LangChainProvider instance @@ -51,7 +52,7 @@ async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'Lang async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse: """ Invoke the LangChain model with an array of messages. - + :param messages: Array of LDMessage objects representing the conversation :return: ChatResponse containing the model's response """ @@ -60,10 +61,15 @@ async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse: langchain_messages = LangChainProvider.convert_messages_to_langchain(messages) # Get the LangChain response - response: AIMessage = await self._llm.ainvoke(langchain_messages) + response: BaseMessage = await self._llm.ainvoke(langchain_messages) # Generate metrics early (assumes success by default) - metrics = LangChainProvider.get_ai_metrics_from_response(response) + # Most chat models return AIMessage, but we handle BaseMessage generically + if isinstance(response, AIMessage): + metrics = LangChainProvider.get_ai_metrics_from_response(response) + else: + # For non-AIMessage responses, create default metrics + metrics = LDAIMetrics(success=True, usage=TokenUsage(total=0, input=0, output=0)) # Extract text content from the response content: str = '' @@ -104,7 +110,7 @@ async def invoke_structured_model( ) -> StructuredResponse: """ Invoke the LangChain model with structured output support. - + :param messages: Array of LDMessage objects representing the conversation :param response_structure: Dictionary of output configurations keyed by output name :return: StructuredResponse containing the structured data @@ -124,7 +130,10 @@ async def invoke_structured_model( if isinstance(response_obj, AIMessage): import json try: - response = json.loads(response_obj.content) + if isinstance(response_obj.content, str): + response = json.loads(response_obj.content) + else: + response = {'content': response_obj.content} except json.JSONDecodeError: response = {'content': response_obj.content} else: @@ -158,7 +167,7 @@ async def invoke_structured_model( def get_chat_model(self) -> BaseChatModel: """ Get the underlying LangChain model instance. - + :return: The LangChain BaseChatModel instance """ return self._llm @@ -171,10 +180,10 @@ def get_chat_model(self) -> BaseChatModel: def map_provider(ld_provider_name: str) -> str: """ Map LaunchDarkly provider names to LangChain provider names. - + This method enables seamless integration between LaunchDarkly's standardized provider naming and LangChain's naming conventions. - + :param ld_provider_name: LaunchDarkly provider name :return: LangChain provider name """ @@ -190,10 +199,10 @@ def map_provider(ld_provider_name: str) -> str: def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics: """ Get AI metrics from a LangChain provider response. - + This method extracts token usage information and success status from LangChain responses and returns a LaunchDarkly LDAIMetrics object. - + :param response: The response from the LangChain model :return: LDAIMetrics with success status and token usage """ @@ -215,10 +224,10 @@ def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics: def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]: """ Convert LaunchDarkly messages to LangChain messages. - + This helper method enables developers to work directly with LangChain message types while maintaining compatibility with LaunchDarkly's standardized message format. - + :param messages: List of LDMessage objects :return: List of LangChain message objects """ @@ -238,10 +247,10 @@ def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel: """ Create a LangChain model from an AI configuration. - + This public helper method enables developers to initialize their own LangChain models using LaunchDarkly AI configurations. - + :param ai_config: The LaunchDarkly AI configuration :return: A configured LangChain BaseChatModel """ @@ -256,22 +265,27 @@ async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel: try: # Try to import init_chat_model from langchain.chat_models # This is available in langchain >= 0.1.0 + # Use importlib to avoid mypy no-redef error with fallback imports + import importlib + init_chat_model = None try: - from langchain.chat_models import init_chat_model - except ImportError: + module = importlib.import_module('langchain.chat_models') + init_chat_model = getattr(module, 'init_chat_model') + except (ImportError, AttributeError): # Fallback for older versions or different import path - from langchain.chat_models.universal import init_chat_model - + module = importlib.import_module('langchain.chat_models.universal') + init_chat_model = getattr(module, 'init_chat_model') + # Map provider name langchain_provider = LangChainProvider.map_provider(provider) - + # Create model configuration model_kwargs = {**parameters} if langchain_provider: model_kwargs['model_provider'] = langchain_provider - + # Initialize the chat model (init_chat_model may be async or sync) - result = init_chat_model(model_name, **model_kwargs) + result = init_chat_model(model_name, **model_kwargs) # type: ignore[misc] # Handle both sync and async initialization if hasattr(result, '__await__'): return await result @@ -281,4 +295,3 @@ async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel: 'langchain package is required for LangChainProvider. ' 'Install it with: pip install langchain langchain-core' ) from e - diff --git a/ldai/providers/types.py b/ldai/providers/types.py index 45df755..de54698 100644 --- a/ldai/providers/types.py +++ b/ldai/providers/types.py @@ -89,4 +89,3 @@ def to_dict(self) -> Dict[str, Any]: if self.error is not None: result['error'] = self.error return result - diff --git a/ldai/testing/test_model_config.py b/ldai/testing/test_model_config.py index 13c0523..26a02c9 100644 --- a/ldai/testing/test_model_config.py +++ b/ldai/testing/test_model_config.py @@ -2,7 +2,8 @@ from ldclient import Config, Context, LDClient from ldclient.integrations.test_data import TestData -from ldai import AIConfig, LDAIClient, LDMessage, ModelConfig +from ldai import LDAIClient, LDMessage, ModelConfig +from ldai.models import AICompletionConfigDefault @pytest.fixture @@ -133,7 +134,7 @@ def test_model_config_handles_custom(): def test_uses_default_on_invalid_flag(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig( + default_value = AICompletionConfigDefault( enabled=True, model=ModelConfig('fakeModel', parameters={'temperature': 0.5, 'maxTokens': 4096}), messages=[LDMessage(role='system', content='Hello, {{name}}!')], @@ -155,7 +156,7 @@ def test_uses_default_on_invalid_flag(ldai_client: LDAIClient): def test_model_config_interpolation(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig( + default_value = AICompletionConfigDefault( enabled=True, model=ModelConfig('fakeModel'), messages=[LDMessage(role='system', content='Hello, {{name}}!')], @@ -177,7 +178,7 @@ def test_model_config_interpolation(ldai_client: LDAIClient): def test_model_config_no_variables(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[]) config = ldai_client.config('model-config', context, default_value, {}) @@ -194,7 +195,7 @@ def test_model_config_no_variables(ldai_client: LDAIClient): def test_provider_config_handling(ldai_client: LDAIClient): context = Context.builder('user-key').name("Sandy").build() - default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World'} config = ldai_client.config('model-config', context, default_value, variables) @@ -205,7 +206,7 @@ def test_provider_config_handling(ldai_client: LDAIClient): def test_context_interpolation(ldai_client: LDAIClient): context = Context.builder('user-key').name("Sandy").set('last', 'Beaches').build() - default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World'} config = ldai_client.config( @@ -228,7 +229,7 @@ def test_multi_context_interpolation(ldai_client: LDAIClient): user_context = Context.builder('user-key').name("Sandy").build() org_context = Context.builder('org-key').kind('org').name("LaunchDarkly").set('shortname', 'LD').build() context = Context.multi_builder().add(user_context).add(org_context).build() - default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World'} config = ldai_client.config( @@ -249,7 +250,7 @@ def test_multi_context_interpolation(ldai_client: LDAIClient): def test_model_config_multiple(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World', 'day': 'Monday'} config = ldai_client.config( @@ -270,7 +271,7 @@ def test_model_config_multiple(ldai_client: LDAIClient): def test_model_config_disabled(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[]) config = ldai_client.config('off-config', context, default_value, {}) @@ -283,7 +284,7 @@ def test_model_config_disabled(ldai_client: LDAIClient): def test_model_initial_config_disabled(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[]) config = ldai_client.config('initial-config-disabled', context, default_value, {}) @@ -295,7 +296,7 @@ def test_model_initial_config_disabled(ldai_client: LDAIClient): def test_model_initial_config_enabled(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[]) config = ldai_client.config('initial-config-enabled', context, default_value, {}) @@ -318,7 +319,7 @@ def test_config_method_tracking(ldai_client: LDAIClient): client = LDAIClient(mock_client) context = Context.create('user-key') - default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[]) config = client.config('test-config-key', context, default_value) diff --git a/ldai/tracker.py b/ldai/tracker.py index 632f0f4..11b846a 100644 --- a/ldai/tracker.py +++ b/ldai/tracker.py @@ -211,7 +211,7 @@ def track_eval_scores(self, scores: Dict[str, Any]) -> None: :param scores: Dictionary mapping metric keys to their evaluation scores (EvalScore objects) """ from ldai.providers.types import EvalScore - + # Track each evaluation score individually for metric_key, eval_score in scores.items(): if isinstance(eval_score, EvalScore): @@ -229,12 +229,12 @@ def track_judge_response(self, judge_response: Any) -> None: :param judge_response: JudgeResponse object containing evals and success status """ from ldai.providers.types import JudgeResponse - + if isinstance(judge_response, JudgeResponse): # Track evaluation scores if judge_response.evals: self.track_eval_scores(judge_response.evals) - + # Track success/error based on judge response if judge_response.success: self.track_success() diff --git a/setup.cfg b/setup.cfg index c178190..1fb1827 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,2 @@ [pycodestyle] -ignore = E501 +ignore = E501,W503 From 3ffb55d1434a8f1ba4af6c75d0b0bc46e79fafd2 Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Tue, 9 Dec 2025 19:15:06 +0100 Subject: [PATCH 16/28] fix deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 200215c..9c1f44a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ chevron = "=0.14.0" pytest = ">=2.8" pytest-cov = ">=2.4.0" pytest-mypy = "==1.0.1" +pytest-asyncio = ">=0.21.0" mypy = "==1.18.2" pycodestyle = "^2.12.1" isort = ">=5.13.2,<7.0.0" From 64bb5f75b5a62022c1fb5a37fa1dde2e2c632db4 Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Tue, 9 Dec 2025 22:59:05 +0100 Subject: [PATCH 17/28] remove langchain and comment ref lines for now --- ldai/providers/__init__.py | 35 +-- ldai/providers/ai_provider_factory.py | 42 ++-- ldai/providers/langchain/__init__.py | 297 -------------------------- 3 files changed, 44 insertions(+), 330 deletions(-) delete mode 100644 ldai/providers/langchain/__init__.py diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py index 1b3452b..48152cc 100644 --- a/ldai/providers/__init__.py +++ b/ldai/providers/__init__.py @@ -5,17 +5,24 @@ SupportedAIProvider) # Export LangChain provider if available -try: - from ldai.providers.langchain import LangChainProvider - __all__ = [ - 'AIProvider', - 'AIProviderFactory', - 'LangChainProvider', - 'SupportedAIProvider', - ] -except ImportError: - __all__ = [ - 'AIProvider', - 'AIProviderFactory', - 'SupportedAIProvider', - ] +# TODO: Uncomment when langchain provider package is introduced +# try: +# from ldai.providers.langchain import LangChainProvider +# __all__ = [ +# 'AIProvider', +# 'AIProviderFactory', +# 'LangChainProvider', +# 'SupportedAIProvider', +# ] +# except ImportError: +# __all__ = [ +# 'AIProvider', +# 'AIProviderFactory', +# 'SupportedAIProvider', +# ] + +__all__ = [ + 'AIProvider', + 'AIProviderFactory', + 'SupportedAIProvider', +] diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py index 5dd441d..3fd0f50 100644 --- a/ldai/providers/ai_provider_factory.py +++ b/ldai/providers/ai_provider_factory.py @@ -1,19 +1,21 @@ """Factory for creating AIProvider instances based on the provider configuration.""" import importlib -from typing import Any, List, Literal, Optional, Type +from typing import Any, Dict, List, Literal, Optional, Tuple, Type from ldai.models import AIConfigKind from ldai.providers.ai_provider import AIProvider # List of supported AI providers -SUPPORTED_AI_PROVIDERS = [ +SUPPORTED_AI_PROVIDERS: List[str] = [ # Multi-provider packages should be last in the list - 'langchain', + # 'langchain', # TODO: Uncomment when langchain provider package is introduced ] # Type representing the supported AI providers -SupportedAIProvider = Literal['langchain'] +# TODO: Update this type when provider packages are introduced +# SupportedAIProvider = Literal['langchain'] +SupportedAIProvider = Literal['none'] # Placeholder until providers are added class AIProviderFactory: @@ -79,9 +81,10 @@ def _get_providers_to_try( provider_set.add(provider_name) # type: ignore # Then try multi-provider packages, but avoid duplicates - multi_provider_packages: List[SupportedAIProvider] = ['langchain'] - for provider in multi_provider_packages: - provider_set.add(provider) + # TODO: Uncomment when langchain provider package is introduced + # multi_provider_packages: List[SupportedAIProvider] = ['langchain'] + # for provider in multi_provider_packages: + # provider_set.add(provider) # Return list of providers, converting from set # The set contains strings that should be valid SupportedAIProvider values @@ -102,20 +105,21 @@ async def _try_create_provider( :return: AIProvider instance or None if creation failed """ # Handle built-in providers (part of this package) - if provider_type == 'langchain': - try: - from ldai.providers.langchain import LangChainProvider - return await LangChainProvider.create(ai_config, logger) - except ImportError as error: - if logger: - logger.warn( - f"Error creating LangChainProvider: {error}. " - f"Make sure langchain and langchain-core packages are installed." - ) - return None + # TODO: Uncomment when langchain provider package is introduced + # if provider_type == 'langchain': + # try: + # from ldai.providers.langchain import LangChainProvider + # return await LangChainProvider.create(ai_config, logger) + # except ImportError as error: + # if logger: + # logger.warn( + # f"Error creating LangChainProvider: {error}. " + # f"Make sure langchain and langchain-core packages are installed." + # ) + # return None # For future external providers, use dynamic import - provider_mappings = { + provider_mappings: Dict[str, Tuple[str, str]] = { # 'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'), # 'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'), } diff --git a/ldai/providers/langchain/__init__.py b/ldai/providers/langchain/__init__.py deleted file mode 100644 index f1a5aa1..0000000 --- a/ldai/providers/langchain/__init__.py +++ /dev/null @@ -1,297 +0,0 @@ -"""LangChain implementation of AIProvider for LaunchDarkly AI SDK.""" - -from typing import Any, Dict, List, Optional - -from langchain_core.language_models.chat_models import BaseChatModel -from langchain_core.messages import (AIMessage, BaseMessage, HumanMessage, - SystemMessage) - -from ldai.models import AIConfigKind, LDMessage -from ldai.providers.ai_provider import AIProvider -from ldai.providers.types import ChatResponse, LDAIMetrics, StructuredResponse -from ldai.tracker import TokenUsage - - -class LangChainProvider(AIProvider): - """ - LangChain implementation of AIProvider. - - This provider integrates LangChain models with LaunchDarkly's tracking capabilities. - """ - - def __init__(self, llm: BaseChatModel, logger: Optional[Any] = None): - """ - Initialize the LangChain provider. - - :param llm: LangChain BaseChatModel instance - :param logger: Optional logger for logging provider operations - """ - super().__init__(logger) - self._llm = llm - - # ============================================================================= - # MAIN FACTORY METHOD - # ============================================================================= - - @staticmethod - async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'LangChainProvider': - """ - Static factory method to create a LangChain AIProvider from an AI configuration. - - :param ai_config: The LaunchDarkly AI configuration - :param logger: Optional logger for the provider - :return: Configured LangChainProvider instance - """ - llm = await LangChainProvider.create_langchain_model(ai_config) - return LangChainProvider(llm, logger) - - # ============================================================================= - # INSTANCE METHODS (AIProvider Implementation) - # ============================================================================= - - async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse: - """ - Invoke the LangChain model with an array of messages. - - :param messages: Array of LDMessage objects representing the conversation - :return: ChatResponse containing the model's response - """ - try: - # Convert LDMessage[] to LangChain messages - langchain_messages = LangChainProvider.convert_messages_to_langchain(messages) - - # Get the LangChain response - response: BaseMessage = await self._llm.ainvoke(langchain_messages) - - # Generate metrics early (assumes success by default) - # Most chat models return AIMessage, but we handle BaseMessage generically - if isinstance(response, AIMessage): - metrics = LangChainProvider.get_ai_metrics_from_response(response) - else: - # For non-AIMessage responses, create default metrics - metrics = LDAIMetrics(success=True, usage=TokenUsage(total=0, input=0, output=0)) - - # Extract text content from the response - content: str = '' - if isinstance(response.content, str): - content = response.content - else: - # Log warning for non-string content (likely multimodal) - if self.logger: - self.logger.warn( - f"Multimodal response not supported, expecting a string. " - f"Content type: {type(response.content)}, Content: {response.content}" - ) - # Update metrics to reflect content loss - metrics.success = False - - # Create the assistant message - from ldai.models import LDMessage - assistant_message = LDMessage(role='assistant', content=content) - - return ChatResponse( - message=assistant_message, - metrics=metrics, - ) - except Exception as error: - if self.logger: - self.logger.warn(f'LangChain model invocation failed: {error}') - - from ldai.models import LDMessage - return ChatResponse( - message=LDMessage(role='assistant', content=''), - metrics=LDAIMetrics(success=False, usage=None), - ) - - async def invoke_structured_model( - self, - messages: List[LDMessage], - response_structure: Dict[str, Any], - ) -> StructuredResponse: - """ - Invoke the LangChain model with structured output support. - - :param messages: Array of LDMessage objects representing the conversation - :param response_structure: Dictionary of output configurations keyed by output name - :return: StructuredResponse containing the structured data - """ - try: - # Convert LDMessage[] to LangChain messages - langchain_messages = LangChainProvider.convert_messages_to_langchain(messages) - - # Get the LangChain response with structured output - # Note: with_structured_output is available on BaseChatModel in newer LangChain versions - if hasattr(self._llm, 'with_structured_output'): - structured_llm = self._llm.with_structured_output(response_structure) - response = await structured_llm.ainvoke(langchain_messages) - else: - # Fallback: invoke normally and try to parse as JSON - response_obj = await self._llm.ainvoke(langchain_messages) - if isinstance(response_obj, AIMessage): - import json - try: - if isinstance(response_obj.content, str): - response = json.loads(response_obj.content) - else: - response = {'content': response_obj.content} - except json.JSONDecodeError: - response = {'content': response_obj.content} - else: - response = response_obj - - # Using structured output doesn't support metrics - metrics = LDAIMetrics( - success=True, - usage=TokenUsage(total=0, input=0, output=0), - ) - - import json - return StructuredResponse( - data=response if isinstance(response, dict) else {'result': response}, - raw_response=json.dumps(response) if not isinstance(response, str) else response, - metrics=metrics, - ) - except Exception as error: - if self.logger: - self.logger.warn(f'LangChain structured model invocation failed: {error}') - - return StructuredResponse( - data={}, - raw_response='', - metrics=LDAIMetrics( - success=False, - usage=TokenUsage(total=0, input=0, output=0), - ), - ) - - def get_chat_model(self) -> BaseChatModel: - """ - Get the underlying LangChain model instance. - - :return: The LangChain BaseChatModel instance - """ - return self._llm - - # ============================================================================= - # STATIC UTILITY METHODS - # ============================================================================= - - @staticmethod - def map_provider(ld_provider_name: str) -> str: - """ - Map LaunchDarkly provider names to LangChain provider names. - - This method enables seamless integration between LaunchDarkly's standardized - provider naming and LangChain's naming conventions. - - :param ld_provider_name: LaunchDarkly provider name - :return: LangChain provider name - """ - lowercased_name = ld_provider_name.lower() - - mapping: Dict[str, str] = { - 'gemini': 'google-genai', - } - - return mapping.get(lowercased_name, lowercased_name) - - @staticmethod - def get_ai_metrics_from_response(response: AIMessage) -> LDAIMetrics: - """ - Get AI metrics from a LangChain provider response. - - This method extracts token usage information and success status from LangChain responses - and returns a LaunchDarkly LDAIMetrics object. - - :param response: The response from the LangChain model - :return: LDAIMetrics with success status and token usage - """ - # Extract token usage if available - usage: Optional[TokenUsage] = None - if hasattr(response, 'response_metadata') and response.response_metadata: - token_usage = response.response_metadata.get('token_usage') - if token_usage: - usage = TokenUsage( - total=token_usage.get('total_tokens', 0) or token_usage.get('totalTokens', 0) or 0, - input=token_usage.get('prompt_tokens', 0) or token_usage.get('promptTokens', 0) or 0, - output=token_usage.get('completion_tokens', 0) or token_usage.get('completionTokens', 0) or 0, - ) - - # LangChain responses that complete successfully are considered successful by default - return LDAIMetrics(success=True, usage=usage) - - @staticmethod - def convert_messages_to_langchain(messages: List[LDMessage]) -> List[BaseMessage]: - """ - Convert LaunchDarkly messages to LangChain messages. - - This helper method enables developers to work directly with LangChain message types - while maintaining compatibility with LaunchDarkly's standardized message format. - - :param messages: List of LDMessage objects - :return: List of LangChain message objects - """ - result: List[BaseMessage] = [] - for msg in messages: - if msg.role == 'system': - result.append(SystemMessage(content=msg.content)) - elif msg.role == 'user': - result.append(HumanMessage(content=msg.content)) - elif msg.role == 'assistant': - result.append(AIMessage(content=msg.content)) - else: - raise ValueError(f'Unsupported message role: {msg.role}') - return result - - @staticmethod - async def create_langchain_model(ai_config: AIConfigKind) -> BaseChatModel: - """ - Create a LangChain model from an AI configuration. - - This public helper method enables developers to initialize their own LangChain models - using LaunchDarkly AI configurations. - - :param ai_config: The LaunchDarkly AI configuration - :return: A configured LangChain BaseChatModel - """ - model_name = ai_config.model.name if ai_config.model else '' - provider = ai_config.provider.name if ai_config.provider else '' - parameters = ai_config.model.get_parameter('parameters') if ai_config.model else {} - if not isinstance(parameters, dict): - parameters = {} - - # Use LangChain's init_chat_model to support multiple providers - # Note: This requires langchain package to be installed - try: - # Try to import init_chat_model from langchain.chat_models - # This is available in langchain >= 0.1.0 - # Use importlib to avoid mypy no-redef error with fallback imports - import importlib - init_chat_model = None - try: - module = importlib.import_module('langchain.chat_models') - init_chat_model = getattr(module, 'init_chat_model') - except (ImportError, AttributeError): - # Fallback for older versions or different import path - module = importlib.import_module('langchain.chat_models.universal') - init_chat_model = getattr(module, 'init_chat_model') - - # Map provider name - langchain_provider = LangChainProvider.map_provider(provider) - - # Create model configuration - model_kwargs = {**parameters} - if langchain_provider: - model_kwargs['model_provider'] = langchain_provider - - # Initialize the chat model (init_chat_model may be async or sync) - result = init_chat_model(model_name, **model_kwargs) # type: ignore[misc] - # Handle both sync and async initialization - if hasattr(result, '__await__'): - return await result - return result - except ImportError as e: - raise ImportError( - 'langchain package is required for LangChainProvider. ' - 'Install it with: pip install langchain langchain-core' - ) from e From 86acd6e8441ba31a7159de9696ffcf89308f8d70 Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Wed, 10 Dec 2025 20:10:50 +0100 Subject: [PATCH 18/28] simplify --- ldai/chat/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py index bcb4284..931bb54 100644 --- a/ldai/chat/__init__.py +++ b/ldai/chat/__init__.py @@ -72,8 +72,7 @@ async def invoke(self, prompt: str) -> ChatResponse: and self._ai_config.judge_configuration.judges and len(self._ai_config.judge_configuration.judges) > 0 ): - evaluation_tasks = self._start_judge_evaluations(self._messages, response) - response.evaluations = evaluation_tasks + response.evaluations = self._start_judge_evaluations(self._messages, response) # Add the response message to conversation history self._messages.append(response.message) From 11f7602d844e8042c1e840d6aaf98f156973d6ac Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Wed, 10 Dec 2025 20:21:34 +0100 Subject: [PATCH 19/28] add judgeConfigKey --- ldai/chat/__init__.py | 3 ++- ldai/providers/types.py | 3 +++ ldai/tracker.py | 25 +++++++++++++++---------- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py index 931bb54..1418561 100644 --- a/ldai/chat/__init__.py +++ b/ldai/chat/__init__.py @@ -112,7 +112,8 @@ async def evaluate_judge(judge_config): ) if eval_result and eval_result.success: - self._tracker.track_eval_scores(eval_result.evals) + eval_result.judge_config_key = judge_config.key + self._tracker.track_judge_response(eval_result) return eval_result diff --git a/ldai/providers/types.py b/ldai/providers/types.py index de54698..436dd46 100644 --- a/ldai/providers/types.py +++ b/ldai/providers/types.py @@ -74,6 +74,7 @@ class JudgeResponse: """ Response from a judge evaluation containing scores and reasoning for multiple metrics. """ + judge_config_key: Optional[str] = None # The key of the judge configuration that was used to generate this response evals: Dict[str, EvalScore] # Dictionary where keys are metric names and values contain score and reasoning success: bool # Whether the evaluation completed successfully error: Optional[str] = None # Error message if evaluation failed @@ -86,6 +87,8 @@ def to_dict(self) -> Dict[str, Any]: 'evals': {key: eval_score.to_dict() for key, eval_score in self.evals.items()}, 'success': self.success, } + if self.judge_config_key is not None: + result['judgeConfigKey'] = self.judge_config_key if self.error is not None: result['error'] = self.error return result diff --git a/ldai/tracker.py b/ldai/tracker.py index 11b846a..8ed76f5 100644 --- a/ldai/tracker.py +++ b/ldai/tracker.py @@ -224,22 +224,27 @@ def track_eval_scores(self, scores: Dict[str, Any]) -> None: def track_judge_response(self, judge_response: Any) -> None: """ - Track a judge response, including evaluation scores and success status. + Track a judge response, including evaluation scores with judge config key. :param judge_response: JudgeResponse object containing evals and success status """ - from ldai.providers.types import JudgeResponse + from ldai.providers.types import JudgeResponse, EvalScore if isinstance(judge_response, JudgeResponse): - # Track evaluation scores + # Track evaluation scores with judge config key included in metadata if judge_response.evals: - self.track_eval_scores(judge_response.evals) - - # Track success/error based on judge response - if judge_response.success: - self.track_success() - else: - self.track_error() + track_data = self.__get_track_data() + if judge_response.judge_config_key: + track_data = {**track_data, 'judgeConfigKey': judge_response.judge_config_key} + + for metric_key, eval_score in judge_response.evals.items(): + if isinstance(eval_score, EvalScore): + self._ld_client.track( + metric_key, + self._context, + track_data, + eval_score.score + ) def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None: """ From 06acc2169110af4fccd384b8e44e9c45674d34c6 Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Wed, 10 Dec 2025 20:25:25 +0100 Subject: [PATCH 20/28] strongly type JudgeResponse --- ldai/providers/types.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ldai/providers/types.py b/ldai/providers/types.py index 436dd46..5b208d4 100644 --- a/ldai/providers/types.py +++ b/ldai/providers/types.py @@ -1,5 +1,7 @@ """Types for AI provider responses.""" +from __future__ import annotations + from dataclasses import dataclass from typing import Any, Dict, List, Optional @@ -38,7 +40,7 @@ class ChatResponse: """ message: LDMessage metrics: LDAIMetrics - evaluations: Optional[List[Any]] = None # List of JudgeResponse, will be populated later + evaluations: Optional[List[JudgeResponse]] = None # List of JudgeResponse, will be populated later @dataclass From 84669d5622b2dcfe02f25e9161fb70a81562df1a Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Wed, 10 Dec 2025 20:32:19 +0100 Subject: [PATCH 21/28] AIJudge to Judge --- ldai/__init__.py | 4 ++-- ldai/chat/__init__.py | 6 +++--- ldai/client.py | 10 +++++----- ldai/judge/__init__.py | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/ldai/__init__.py b/ldai/__init__.py index 78125d7..5457f05 100644 --- a/ldai/__init__.py +++ b/ldai/__init__.py @@ -5,7 +5,7 @@ from ldai.chat import TrackedChat from ldai.client import LDAIClient # Export judge -from ldai.judge import AIJudge +from ldai.judge import Judge # Export models for convenience from ldai.models import ( # Deprecated aliases for backward compatibility AIAgentConfig, AIAgentConfigDefault, AIAgentConfigRequest, AIAgents, @@ -25,7 +25,7 @@ 'AICompletionConfigDefault', 'AIJudgeConfig', 'AIJudgeConfigDefault', - 'AIJudge', + 'Judge', 'TrackedChat', 'EvalScore', 'JudgeConfiguration', diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py index 1418561..13e8995 100644 --- a/ldai/chat/__init__.py +++ b/ldai/chat/__init__.py @@ -3,7 +3,7 @@ import asyncio from typing import Any, Dict, List, Optional -from ldai.judge import AIJudge +from ldai.judge import Judge from ldai.models import AICompletionConfig, LDMessage from ldai.providers.ai_provider import AIProvider from ldai.providers.types import ChatResponse, JudgeResponse @@ -24,7 +24,7 @@ def __init__( ai_config: AICompletionConfig, tracker: LDAIConfigTracker, provider: AIProvider, - judges: Optional[Dict[str, AIJudge]] = None, + judges: Optional[Dict[str, Judge]] = None, logger: Optional[Any] = None, ): """ @@ -151,7 +151,7 @@ def get_provider(self) -> AIProvider: """ return self._provider - def get_judges(self) -> Dict[str, AIJudge]: + def get_judges(self) -> Dict[str, Judge]: """ Get the judges associated with this TrackedChat. diff --git a/ldai/client.py b/ldai/client.py index 086e99b..fab8afe 100644 --- a/ldai/client.py +++ b/ldai/client.py @@ -6,7 +6,7 @@ from ldclient.client import LDClient from ldai.chat import TrackedChat -from ldai.judge import AIJudge +from ldai.judge import Judge from ldai.models import (AIAgentConfig, AIAgentConfigDefault, AIAgentConfigRequest, AIAgents, AICompletionConfig, AICompletionConfigDefault, AIJudgeConfig, @@ -121,7 +121,7 @@ async def create_judge( default_value: AIJudgeConfigDefault, variables: Optional[Dict[str, Any]] = None, default_ai_provider: Optional[SupportedAIProvider] = None, - ) -> Optional[AIJudge]: + ) -> Optional[Judge]: """ Creates and returns a new Judge instance for AI evaluation. @@ -182,7 +182,7 @@ async def create_judge( if not provider: return None - return AIJudge(judge_config, judge_config.tracker, provider, self._logger) + return Judge(judge_config, judge_config.tracker, provider, self._logger) except Exception as error: # Would log error if logger available return None @@ -193,7 +193,7 @@ async def _initialize_judges( context: Context, variables: Optional[Dict[str, Any]] = None, default_ai_provider: Optional[SupportedAIProvider] = None, - ) -> Dict[str, AIJudge]: + ) -> Dict[str, Judge]: """ Initialize judges from judge configurations. @@ -203,7 +203,7 @@ async def _initialize_judges( :param default_ai_provider: Optional default AI provider to use :return: Dictionary of judge instances keyed by their configuration keys """ - judges: Dict[str, AIJudge] = {} + judges: Dict[str, Judge] = {} async def create_judge_for_config(judge_key: str): judge = await self.create_judge( diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py index 3caad65..3fbf2a1 100644 --- a/ldai/judge/__init__.py +++ b/ldai/judge/__init__.py @@ -13,7 +13,7 @@ from ldai.tracker import LDAIConfigTracker -class AIJudge: +class Judge: """ Judge implementation that handles evaluation functionality and conversation management. From d57c4f7244891f9fd1fc64488b4cd9115e162bd0 Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Mon, 15 Dec 2025 19:00:40 +0100 Subject: [PATCH 22/28] add key to model --- ldai/client.py | 3 +++ ldai/models.py | 1 + 2 files changed, 4 insertions(+) diff --git a/ldai/client.py b/ldai/client.py index fab8afe..2881cf6 100644 --- a/ldai/client.py +++ b/ldai/client.py @@ -47,6 +47,7 @@ def completion_config( ) config = AICompletionConfig( + key=key, enabled=bool(enabled), model=model, messages=messages, @@ -104,6 +105,7 @@ def judge_config( evaluation_metric_keys = variation.get('evaluationMetricKeys', default_value.evaluation_metric_keys or []) config = AIJudgeConfig( + key=key, enabled=bool(enabled), evaluation_metric_keys=evaluation_metric_keys, model=model, @@ -551,6 +553,7 @@ def __evaluate_agent( final_instructions = instructions if instructions is not None else default_value.instructions return AIAgentConfig( + key=key, enabled=bool(enabled) if enabled is not None else (default_value.enabled or False), model=model or default_value.model, provider=provider or default_value.provider, diff --git a/ldai/models.py b/ldai/models.py index c075dcf..988d97d 100644 --- a/ldai/models.py +++ b/ldai/models.py @@ -170,6 +170,7 @@ class AIConfig: """ Base AI Config interface without mode-specific fields. """ + key: str enabled: bool model: Optional[ModelConfig] = None provider: Optional[ProviderConfig] = None From 351d4f1ac8b50931af6eb8ecba7f029a6f4d5029 Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Tue, 16 Dec 2025 22:07:36 +0100 Subject: [PATCH 23/28] fixes --- ldai/providers/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldai/providers/types.py b/ldai/providers/types.py index 5b208d4..e9160cc 100644 --- a/ldai/providers/types.py +++ b/ldai/providers/types.py @@ -76,9 +76,9 @@ class JudgeResponse: """ Response from a judge evaluation containing scores and reasoning for multiple metrics. """ - judge_config_key: Optional[str] = None # The key of the judge configuration that was used to generate this response evals: Dict[str, EvalScore] # Dictionary where keys are metric names and values contain score and reasoning success: bool # Whether the evaluation completed successfully + judge_config_key: Optional[str] = None # The key of the judge configuration that was used to generate this response error: Optional[str] = None # Error message if evaluation failed def to_dict(self) -> Dict[str, Any]: From 7a699ef47004fc6b98634546d23aaa00416320b3 Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Tue, 16 Dec 2025 22:08:52 +0100 Subject: [PATCH 24/28] fix linting --- ldai/tracker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ldai/tracker.py b/ldai/tracker.py index 8ed76f5..b4d8b30 100644 --- a/ldai/tracker.py +++ b/ldai/tracker.py @@ -228,7 +228,7 @@ def track_judge_response(self, judge_response: Any) -> None: :param judge_response: JudgeResponse object containing evals and success status """ - from ldai.providers.types import JudgeResponse, EvalScore + from ldai.providers.types import EvalScore, JudgeResponse if isinstance(judge_response, JudgeResponse): # Track evaluation scores with judge config key included in metadata @@ -236,7 +236,7 @@ def track_judge_response(self, judge_response: Any) -> None: track_data = self.__get_track_data() if judge_response.judge_config_key: track_data = {**track_data, 'judgeConfigKey': judge_response.judge_config_key} - + for metric_key, eval_score in judge_response.evals.items(): if isinstance(eval_score, EvalScore): self._ld_client.track( From 8d3bfbbe544af5aad7d23392ca0c2a023b2c1de4 Mon Sep 17 00:00:00 2001 From: Edwin Okonkwo Date: Tue, 16 Dec 2025 22:18:00 +0100 Subject: [PATCH 25/28] revert to sync --- ldai/testing/test_tracker.py | 14 ++++++-------- ldai/tracker.py | 6 +++--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/ldai/testing/test_tracker.py b/ldai/testing/test_tracker.py index 2e39d98..57f13fd 100644 --- a/ldai/testing/test_tracker.py +++ b/ldai/testing/test_tracker.py @@ -276,8 +276,7 @@ def test_tracks_bedrock_metrics_with_error(client: LDClient): assert tracker.get_summary().usage == TokenUsage(330, 220, 110) -@pytest.mark.asyncio -async def test_tracks_openai_metrics(client: LDClient): +def test_tracks_openai_metrics(client: LDClient): context = Context.create("user-key") tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context) @@ -293,10 +292,10 @@ def to_dict(self): "completion_tokens": 110, } - async def get_result(): + def get_result(): return Result() - await tracker.track_openai_metrics(get_result) + tracker.track_openai_metrics(get_result) calls = [ call( @@ -330,16 +329,15 @@ async def get_result(): assert tracker.get_summary().usage == TokenUsage(330, 220, 110) -@pytest.mark.asyncio -async def test_tracks_openai_metrics_with_exception(client: LDClient): +def test_tracks_openai_metrics_with_exception(client: LDClient): context = Context.create("user-key") tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context) - async def raise_exception(): + def raise_exception(): raise ValueError("Something went wrong") try: - await tracker.track_openai_metrics(raise_exception) + tracker.track_openai_metrics(raise_exception) assert False, "Should have thrown an exception" except ValueError: pass diff --git a/ldai/tracker.py b/ldai/tracker.py index b4d8b30..63aa67c 100644 --- a/ldai/tracker.py +++ b/ldai/tracker.py @@ -286,7 +286,7 @@ def track_error(self) -> None: "$ld:ai:generation:error", self._context, self.__get_track_data(), 1 ) - async def track_openai_metrics(self, func): + def track_openai_metrics(self, func): """ Track OpenAI-specific operations. @@ -300,12 +300,12 @@ async def track_openai_metrics(self, func): A failed operation will not have any token usage data. - :param func: Async function to track. + :param func: Function to track. :return: Result of the tracked function. """ start_time = time.time() try: - result = await func() + result = func() end_time = time.time() duration = int((end_time - start_time) * 1000) self.track_duration(duration) From 5de380b08bcb5baa07ce8e574b9414e9956c3f2e Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Tue, 16 Dec 2025 23:29:42 +0000 Subject: [PATCH 26/28] judge should set key for responses --- ldai/chat/__init__.py | 1 - ldai/judge/__init__.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py index 13e8995..1283281 100644 --- a/ldai/chat/__init__.py +++ b/ldai/chat/__init__.py @@ -112,7 +112,6 @@ async def evaluate_judge(judge_config): ) if eval_result and eval_result.success: - eval_result.judge_config_key = judge_config.key self._tracker.track_judge_response(eval_result) return eval_result diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py index 3fbf2a1..058eb13 100644 --- a/ldai/judge/__init__.py +++ b/ldai/judge/__init__.py @@ -94,6 +94,7 @@ async def evaluate( success = False return JudgeResponse( + judge_config_key=self._ai_config.key, evals=evals, success=success, ) From 07c54548daef7e00cf93f1c005e2941aa03bad9b Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Wed, 17 Dec 2025 04:08:36 +0000 Subject: [PATCH 27/28] use simplified Chat name --- ldai/__init__.py | 4 ++-- ldai/chat/__init__.py | 14 +++++++------- ldai/client.py | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/ldai/__init__.py b/ldai/__init__.py index 5457f05..d017cad 100644 --- a/ldai/__init__.py +++ b/ldai/__init__.py @@ -2,7 +2,7 @@ # Export main client # Export chat -from ldai.chat import TrackedChat +from ldai.chat import Chat from ldai.client import LDAIClient # Export judge from ldai.judge import Judge @@ -26,7 +26,7 @@ 'AIJudgeConfig', 'AIJudgeConfigDefault', 'Judge', - 'TrackedChat', + 'Chat', 'EvalScore', 'JudgeConfiguration', 'JudgeResponse', diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py index 1283281..ff9b0c7 100644 --- a/ldai/chat/__init__.py +++ b/ldai/chat/__init__.py @@ -1,4 +1,4 @@ -"""TrackedChat implementation for managing AI chat conversations.""" +"""Chat implementation for managing AI chat conversations.""" import asyncio from typing import Any, Dict, List, Optional @@ -10,9 +10,9 @@ from ldai.tracker import LDAIConfigTracker -class TrackedChat: +class Chat: """ - Concrete implementation of TrackedChat that provides chat functionality + Concrete implementation of Chat that provides chat functionality by delegating to an AIProvider implementation. This class handles conversation management and tracking, while delegating @@ -28,7 +28,7 @@ def __init__( logger: Optional[Any] = None, ): """ - Initialize the TrackedChat. + Initialize the Chat. :param ai_config: The completion AI configuration :param tracker: The tracker for the completion configuration @@ -126,7 +126,7 @@ async def evaluate_judge(judge_config): def get_config(self) -> AICompletionConfig: """ - Get the underlying AI configuration used to initialize this TrackedChat. + Get the underlying AI configuration used to initialize this Chat. :return: The AI completion configuration """ @@ -134,7 +134,7 @@ def get_config(self) -> AICompletionConfig: def get_tracker(self) -> LDAIConfigTracker: """ - Get the underlying AI configuration tracker used to initialize this TrackedChat. + Get the underlying AI configuration tracker used to initialize this Chat. :return: The tracker instance """ @@ -152,7 +152,7 @@ def get_provider(self) -> AIProvider: def get_judges(self) -> Dict[str, Judge]: """ - Get the judges associated with this TrackedChat. + Get the judges associated with this Chat. Returns a dictionary of judge instances keyed by their configuration keys. diff --git a/ldai/client.py b/ldai/client.py index 2881cf6..ea07915 100644 --- a/ldai/client.py +++ b/ldai/client.py @@ -5,7 +5,7 @@ from ldclient import Context from ldclient.client import LDClient -from ldai.chat import TrackedChat +from ldai.chat import Chat from ldai.judge import Judge from ldai.models import (AIAgentConfig, AIAgentConfigDefault, AIAgentConfigRequest, AIAgents, AICompletionConfig, @@ -241,16 +241,16 @@ async def create_chat( default_value: AICompletionConfigDefault, variables: Optional[Dict[str, Any]] = None, default_ai_provider: Optional[SupportedAIProvider] = None, - ) -> Optional[TrackedChat]: + ) -> Optional[Chat]: """ - Creates and returns a new TrackedChat instance for AI chat conversations. + Creates and returns a new Chat instance for AI conversations. :param key: The key identifying the AI completion configuration to use :param context: Standard Context used when evaluating flags :param default_value: A default value representing a standard AI config result :param variables: Dictionary of values for instruction interpolation :param default_ai_provider: Optional default AI provider to use - :return: TrackedChat instance or None if disabled/unsupported + :return: Chat instance or None if disabled/unsupported Example:: @@ -296,7 +296,7 @@ async def create_chat( default_ai_provider, ) - return TrackedChat(config, config.tracker, provider, judges, self._logger) + return Chat(config, config.tracker, provider, judges, self._logger) def agent_config( self, From 3c77d764805b2aaa75795dd7eeb1e11027d09c54 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Wed, 17 Dec 2025 04:12:28 +0000 Subject: [PATCH 28/28] re-order track_metrics_of params to be more intuitive --- ldai/chat/__init__.py | 2 +- ldai/judge/__init__.py | 2 +- ldai/tracker.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py index ff9b0c7..3d7f40f 100644 --- a/ldai/chat/__init__.py +++ b/ldai/chat/__init__.py @@ -62,8 +62,8 @@ async def invoke(self, prompt: str) -> ChatResponse: # Delegate to provider-specific implementation with tracking response = await self._tracker.track_metrics_of( - lambda result: result.metrics, lambda: self._provider.invoke_model(all_messages), + lambda result: result.metrics, ) # Start judge evaluations as async tasks (don't await them) diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py index 058eb13..7158797 100644 --- a/ldai/judge/__init__.py +++ b/ldai/judge/__init__.py @@ -80,8 +80,8 @@ async def evaluate( # Track metrics of the structured model invocation response = await self._ai_config_tracker.track_metrics_of( + lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure), lambda result: result.metrics, - lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure) ) success = response.metrics.success diff --git a/ldai/tracker.py b/ldai/tracker.py index 63aa67c..e5d7ed2 100644 --- a/ldai/tracker.py +++ b/ldai/tracker.py @@ -157,7 +157,7 @@ def track_duration_of(self, func): return result - async def track_metrics_of(self, metrics_extractor, func): + async def track_metrics_of(self, func, metrics_extractor): """ Track metrics for a generic AI operation. @@ -168,8 +168,8 @@ async def track_metrics_of(self, metrics_extractor, func): In the case the provided function throws, this function will record the duration and an error. A failed operation will not have any token usage data. - :param metrics_extractor: Function that extracts LDAIMetrics from the operation result :param func: Async function which executes the operation + :param metrics_extractor: Function that extracts LDAIMetrics from the operation result :return: The result of the operation """ start_time = time.time()