Skip to content

Commit 171a24b

Browse files
Chibi Vikramclaude
andcommitted
feat: support 'same-as-agent' model option for legacy evaluators
Add support for the 'same-as-agent' model configuration in legacy LLM-based evaluators. When an evaluator specifies 'same-as-agent' as its model, it now resolves to the actual model from agent.json settings instead of throwing an error. Changes: - Updated EvaluatorFactory to accept and pass agent_model parameter - Added _get_agent_model() method to runtime to load model from agent.json - Added logging for model resolution and evaluator creation - Fixed error message in trajectory evaluator (was incorrectly saying "LLM evaluator") 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent a2bf1cf commit 171a24b

File tree

2 files changed

+70
-10
lines changed

2 files changed

+70
-10
lines changed

src/uipath/_cli/_evals/_evaluator_factory.py

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import importlib.util
2+
import logging
23
import sys
34
from pathlib import Path
45
from typing import Any
56

67
from pydantic import TypeAdapter
78

9+
logger = logging.getLogger(__name__)
10+
811
from uipath._cli._evals._helpers import ( # type: ignore # Remove after gnarly fix
912
try_extract_file_and_class_name,
1013
)
@@ -106,12 +109,15 @@ def _prepare_evaluator_config(data: dict[str, Any]) -> dict[str, Any]:
106109

107110
@classmethod
108111
def create_evaluator(
109-
cls, data: dict[str, Any], evaluators_dir: Path | None = None
112+
cls,
113+
data: dict[str, Any],
114+
evaluators_dir: Path | None = None,
115+
agent_model: str | None = None,
110116
) -> BaseEvaluator[Any, Any, Any]:
111117
if data.get("version", None) == "1.0":
112118
return cls._create_evaluator_internal(data, evaluators_dir)
113119
else:
114-
return cls._create_legacy_evaluator_internal(data)
120+
return cls._create_legacy_evaluator_internal(data, agent_model)
115121

116122
@staticmethod
117123
def _create_evaluator_internal(
@@ -371,11 +377,14 @@ def _create_llm_judge_simulation_trajectory_evaluator(
371377
@staticmethod
372378
def _create_legacy_evaluator_internal(
373379
data: dict[str, Any],
380+
agent_model: str | None = None,
374381
) -> LegacyBaseEvaluator[Any]:
375382
"""Create an evaluator instance from configuration data.
376383
377384
Args:
378385
data: Dictionary containing evaluator configuration from JSON file
386+
agent_model: Optional model name from agent settings for resolving
387+
'same-as-agent' model configuration
379388
380389
Returns:
381390
Appropriate evaluator instance based on category
@@ -391,9 +400,13 @@ def _create_legacy_evaluator_internal(
391400
case JsonSimilarityEvaluatorParams():
392401
return EvaluatorFactory._create_legacy_json_similarity_evaluator(params)
393402
case LLMEvaluatorParams():
394-
return EvaluatorFactory._create_legacy_llm_as_judge_evaluator(params)
403+
return EvaluatorFactory._create_legacy_llm_as_judge_evaluator(
404+
params, agent_model
405+
)
395406
case TrajectoryEvaluatorParams():
396-
return EvaluatorFactory._create_legacy_trajectory_evaluator(params)
407+
return EvaluatorFactory._create_legacy_trajectory_evaluator(
408+
params, agent_model
409+
)
397410
case _:
398411
raise ValueError(f"Unknown evaluator category: {params}")
399412

@@ -414,33 +427,59 @@ def _create_legacy_json_similarity_evaluator(
414427
@staticmethod
415428
def _create_legacy_llm_as_judge_evaluator(
416429
params: LLMEvaluatorParams,
430+
agent_model: str | None = None,
417431
) -> LegacyLlmAsAJudgeEvaluator:
418432
"""Create an LLM-as-a-judge evaluator."""
419433
if not params.prompt:
420434
raise ValueError("LLM evaluator must include 'prompt' field")
421435

422436
if not params.model:
423437
raise ValueError("LLM evaluator must include 'model' field")
438+
439+
# Resolve 'same-as-agent' to actual agent model
424440
if params.model == "same-as-agent":
425-
raise ValueError(
426-
"'same-as-agent' model option is not supported by coded agents evaluations. Please select a specific model for the evaluator."
441+
if not agent_model:
442+
raise ValueError(
443+
"'same-as-agent' model option requires agent settings. "
444+
"Ensure agent.json contains valid model settings."
445+
)
446+
logger.info(
447+
f"Resolving 'same-as-agent' to agent model: {agent_model} "
448+
f"for evaluator '{params.name}'"
427449
)
450+
params = params.model_copy(update={"model": agent_model})
428451

452+
logger.info(
453+
f"Creating LLM-as-judge evaluator '{params.name}' with model: {params.model}"
454+
)
429455
return LegacyLlmAsAJudgeEvaluator(**params.model_dump(), config={})
430456

431457
@staticmethod
432458
def _create_legacy_trajectory_evaluator(
433459
params: TrajectoryEvaluatorParams,
460+
agent_model: str | None = None,
434461
) -> LegacyTrajectoryEvaluator:
435462
"""Create a trajectory evaluator."""
436463
if not params.prompt:
437464
raise ValueError("Trajectory evaluator must include 'prompt' field")
438465

439466
if not params.model:
440-
raise ValueError("LLM evaluator must include 'model' field")
467+
raise ValueError("Trajectory evaluator must include 'model' field")
468+
469+
# Resolve 'same-as-agent' to actual agent model
441470
if params.model == "same-as-agent":
442-
raise ValueError(
443-
"'same-as-agent' model option is not supported by coded agents evaluations. Please select a specific model for the evaluator."
471+
if not agent_model:
472+
raise ValueError(
473+
"'same-as-agent' model option requires agent settings. "
474+
"Ensure agent.json contains valid model settings."
475+
)
476+
logger.info(
477+
f"Resolving 'same-as-agent' to agent model: {agent_model} "
478+
f"for evaluator '{params.name}'"
444479
)
480+
params = params.model_copy(update={"model": agent_model})
445481

482+
logger.info(
483+
f"Creating trajectory evaluator '{params.name}' with model: {params.model}"
484+
)
446485
return LegacyTrajectoryEvaluator(**params.model_dump(), config={})

src/uipath/_cli/_evals/_runtime.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,22 @@ async def run_evaluator(
601601

602602
return result
603603

604+
def _get_agent_model(self) -> str | None:
605+
"""Load agent model from agent.json.
606+
607+
Returns:
608+
The model name from agent settings, or None if not found.
609+
"""
610+
agent_json = Path.cwd() / "agent.json"
611+
if agent_json.exists():
612+
try:
613+
with open(agent_json, "r", encoding="utf-8") as f:
614+
data = json.load(f)
615+
return data.get("settings", {}).get("model")
616+
except (json.JSONDecodeError, OSError):
617+
return None
618+
return None
619+
604620
def _load_evaluators(
605621
self, evaluation_set: EvaluationSet
606622
) -> list[BaseEvaluator[Any, Any, Any]]:
@@ -611,6 +627,9 @@ def _load_evaluators(
611627
raise ValueError("eval_set cannot be None")
612628
evaluators_dir = Path(eval_set).parent.parent / "evaluators"
613629

630+
# Load agent model for 'same-as-agent' resolution in legacy evaluators
631+
agent_model = self._get_agent_model()
632+
614633
# If evaluatorConfigs is specified, use that (new field with weights)
615634
# Otherwise, fall back to evaluatorRefs (old field without weights)
616635
if (
@@ -638,7 +657,9 @@ def _load_evaluators(
638657
try:
639658
evaluator_id = data.get("id")
640659
if evaluator_id in evaluator_ref_ids:
641-
evaluator = EvaluatorFactory.create_evaluator(data, evaluators_dir)
660+
evaluator = EvaluatorFactory.create_evaluator(
661+
data, evaluators_dir, agent_model=agent_model
662+
)
642663
evaluators.append(evaluator)
643664
found_evaluator_ids.add(evaluator_id)
644665
except Exception as e:

0 commit comments

Comments
 (0)