diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py index d2faaba83..2b3d6671c 100644 --- a/langfuse/_client/client.py +++ b/langfuse/_client/client.py @@ -2864,50 +2864,50 @@ async def _process_experiment_item( ) raise e - # Run evaluators - evaluations = [] + # Run evaluators + evaluations = [] - for evaluator in evaluators: - try: - eval_metadata: Optional[Dict[str, Any]] = None + for evaluator in evaluators: + try: + eval_metadata: Optional[Dict[str, Any]] = None - if isinstance(item, dict): - eval_metadata = item.get("metadata") - elif hasattr(item, "metadata"): - eval_metadata = item.metadata + if isinstance(item, dict): + eval_metadata = item.get("metadata") + elif hasattr(item, "metadata"): + eval_metadata = item.metadata - eval_results = await _run_evaluator( - evaluator, - input=input_data, - output=output, - expected_output=expected_output, - metadata=eval_metadata, + eval_results = await _run_evaluator( + evaluator, + input=input_data, + output=output, + expected_output=expected_output, + metadata=eval_metadata, + ) + evaluations.extend(eval_results) + + # Store evaluations as scores + for evaluation in eval_results: + self.create_score( + trace_id=trace_id, + observation_id=span.id, + name=evaluation.name, + value=evaluation.value, # type: ignore + comment=evaluation.comment, + metadata=evaluation.metadata, + config_id=evaluation.config_id, + data_type=evaluation.data_type, # type: ignore ) - evaluations.extend(eval_results) - - # Store evaluations as scores - for evaluation in eval_results: - self.create_score( - trace_id=trace_id, - observation_id=span.id, - name=evaluation.name, - value=evaluation.value, # type: ignore - comment=evaluation.comment, - metadata=evaluation.metadata, - config_id=evaluation.config_id, - data_type=evaluation.data_type, # type: ignore - ) - except Exception as e: - langfuse_logger.error(f"Evaluator failed: {e}") + except Exception as e: + langfuse_logger.error(f"Evaluator failed: {e}") - return ExperimentItemResult( - item=item, - output=output, - evaluations=evaluations, - trace_id=trace_id, - dataset_run_id=dataset_run_id, - ) + return ExperimentItemResult( + item=item, + output=output, + evaluations=evaluations, + trace_id=trace_id, + dataset_run_id=dataset_run_id, + ) def _create_experiment_run_name( self, *, name: Optional[str] = None, run_name: Optional[str] = None