From 8696a836ddbf2c324db0bda9558aff7438fb8de5 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Fri, 20 Feb 2026 09:46:15 -0800 Subject: [PATCH] chore: Improve readability of create_evaluation_run PiperOrigin-RevId: 872946786 --- vertexai/_genai/_evals_common.py | 77 +++++++++++++++++ vertexai/_genai/evals.py | 138 +++++++++---------------------- 2 files changed, 117 insertions(+), 98 deletions(-) diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index f33320324a..0bc28994ed 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -258,6 +258,80 @@ def _extract_contents_for_inference( return request_dict_or_raw_text +def _resolve_dataset( + api_client: BaseApiClient, + dataset: Union[types.EvaluationRunDataSource, types.EvaluationDataset], + dest: str, + agent_info_pydantic: Optional[types.evals.AgentInfo] = None, +) -> types.EvaluationRunDataSource: + """Resolves dataset for the evaluation run.""" + if isinstance(dataset, types.EvaluationDataset): + candidate_name = _get_candidate_name(dataset, agent_info_pydantic) + eval_set = _create_evaluation_set_from_dataframe( + api_client, + dest, + dataset.eval_dataset_df, + candidate_name, + ) + dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name) + return dataset + + +def _resolve_inference_configs( + inference_configs: Optional[ + dict[str, types.EvaluationRunInferenceConfigOrDict] + ] = None, + agent_info_pydantic: Optional[types.evals.AgentInfo] = None, +) -> Optional[dict[str, types.EvaluationRunInferenceConfigOrDict]]: + """Resolves inference configs for the evaluation run.""" + if agent_info_pydantic and agent_info_pydantic.name: + inference_configs = {} + inference_configs[agent_info_pydantic.name] = ( + types.EvaluationRunInferenceConfig( + agent_config=types.EvaluationRunAgentConfig( + developer_instruction=genai_types.Content( + parts=[genai_types.Part(text=agent_info_pydantic.instruction)] + ), + tools=agent_info_pydantic.tool_declarations, + ) + ) + ) + return inference_configs + + +def _add_evaluation_run_labels( + labels: Optional[dict[str, str]] = None, + agent_info_pydantic: Optional[types.evals.AgentInfo] = None, +) -> Optional[dict[str, str]]: + """Adds labels to the evaluation run.""" + if agent_info_pydantic and agent_info_pydantic.agent_resource_name: + labels = labels or {} + labels["vertex-ai-evaluation-agent-engine-id"] = ( + agent_info_pydantic.agent_resource_name.split("reasoningEngines/")[-1] + ) + return labels + + +def _get_candidate_name( + dataset: types.EvaluationDataset, + agent_info_pydantic: Optional[types.evals.AgentInfo] = None, +) -> Optional[str]: + """Internal helper to get candidate name.""" + if agent_info_pydantic is not None and ( + dataset.candidate_name + and agent_info_pydantic + and agent_info_pydantic.name + and dataset.candidate_name != agent_info_pydantic.name + ): + logger.warning( + "Evaluation dataset candidate_name and agent_info.name are different." + " Please make sure this is intended." + ) + elif dataset.candidate_name is None and agent_info_pydantic: + return agent_info_pydantic.name + return dataset.candidate_name or None + + def _execute_inference_concurrently( api_client: BaseApiClient, prompt_dataset: pd.DataFrame, @@ -1858,6 +1932,9 @@ def _object_to_dict(obj: Any) -> Union[dict[str, Any], Any]: result[key] = value elif isinstance(value, (list, tuple)): result[key] = [_object_to_dict(item) for item in value] + # Add recursive handling for dictionaries + elif isinstance(value, dict): + result[key] = {k: _object_to_dict(v) for k, v in value.items()} elif isinstance(value, bytes): result[key] = base64.b64encode(value).decode("utf-8") elif hasattr(value, "__dict__"): # Nested object diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 3632628b87..dba63496fb 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -1625,32 +1625,14 @@ def create_evaluation_run( raise ValueError( "At most one of agent_info or inference_configs can be provided." ) - agent_info_pydantic: types.evals.AgentInfo = types.evals.AgentInfo() - if agent_info: - if isinstance(agent_info, dict): - agent_info_pydantic = types.evals.AgentInfo.model_validate(agent_info) - else: - agent_info_pydantic = agent_info - if isinstance(dataset, types.EvaluationDataset): - if dataset.eval_dataset_df is None: - raise ValueError( - "EvaluationDataset must have eval_dataset_df populated." - ) - if agent_info_pydantic is not None and ( - dataset.candidate_name - and agent_info_pydantic - and agent_info_pydantic.name - and dataset.candidate_name != agent_info_pydantic.name - ): - logger.warning( - "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended." - ) - elif dataset.candidate_name is None and agent_info_pydantic: - dataset.candidate_name = agent_info_pydantic.name - eval_set = _evals_common._create_evaluation_set_from_dataframe( - self._api_client, dest, dataset.eval_dataset_df, dataset.candidate_name - ) - dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name) + agent_info_pydantic = ( + types.evals.AgentInfo.model_validate(agent_info) + if isinstance(agent_info, dict) + else (agent_info or types.evals.AgentInfo()) + ) + resolved_dataset = _evals_common._resolve_dataset( + self._api_client, dataset, dest, agent_info_pydantic + ) output_config = genai_types.OutputConfig( gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest) ) @@ -1660,37 +1642,20 @@ def create_evaluation_run( evaluation_config = types.EvaluationRunConfig( output_config=output_config, metrics=resolved_metrics ) - if agent_info_pydantic and agent_info_pydantic.name is not None: - inference_configs = {} - inference_configs[agent_info_pydantic.name] = ( - types.EvaluationRunInferenceConfig( - agent_config=types.EvaluationRunAgentConfig( - developer_instruction=genai_types.Content( - parts=[ - genai_types.Part(text=agent_info_pydantic.instruction) - ] - ), - tools=agent_info_pydantic.tool_declarations, - ) - ) - ) - if agent_info_pydantic.agent_resource_name: - labels = labels or {} - labels["vertex-ai-evaluation-agent-engine-id"] = ( - agent_info_pydantic.agent_resource_name.split("reasoningEngines/")[ - -1 - ] - ) - if not name: - name = f"evaluation_run_{uuid.uuid4()}" - + resolved_inference_configs = _evals_common._resolve_inference_configs( + inference_configs, agent_info_pydantic + ) + resolved_labels = _evals_common._add_evaluation_run_labels( + labels, agent_info_pydantic + ) + resolved_name = name or f"evaluation_run_{uuid.uuid4()}" return self._create_evaluation_run( - name=name, - display_name=display_name or name, - data_source=dataset, + name=resolved_name, + display_name=display_name or resolved_name, + data_source=resolved_dataset, evaluation_config=evaluation_config, - inference_configs=inference_configs, - labels=labels, + inference_configs=resolved_inference_configs, + labels=resolved_labels, config=config, ) @@ -2495,27 +2460,14 @@ async def create_evaluation_run( raise ValueError( "At most one of agent_info or inference_configs can be provided." ) - if agent_info and isinstance(agent_info, dict): - agent_info = types.evals.AgentInfo.model_validate(agent_info) - if isinstance(dataset, types.EvaluationDataset): - if dataset.eval_dataset_df is None: - raise ValueError( - "EvaluationDataset must have eval_dataset_df populated." - ) - if agent_info is not None and ( - dataset.candidate_name - and agent_info.name - and dataset.candidate_name != agent_info.name - ): - logger.warning( - "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended." - ) - elif dataset.candidate_name is None and agent_info: - dataset.candidate_name = agent_info.name - eval_set = _evals_common._create_evaluation_set_from_dataframe( - self._api_client, dest, dataset.eval_dataset_df, dataset.candidate_name - ) - dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name) + agent_info_pydantic = ( + types.evals.AgentInfo.model_validate(agent_info) + if isinstance(agent_info, dict) + else (agent_info or types.evals.AgentInfo()) + ) + resolved_dataset = _evals_common._resolve_dataset( + self._api_client, dataset, dest, agent_info_pydantic + ) output_config = genai_types.OutputConfig( gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest) ) @@ -2525,31 +2477,21 @@ async def create_evaluation_run( evaluation_config = types.EvaluationRunConfig( output_config=output_config, metrics=resolved_metrics ) - if agent_info and agent_info.name is not None: - inference_configs = {} - inference_configs[agent_info.name] = types.EvaluationRunInferenceConfig( - agent_config=types.EvaluationRunAgentConfig( - developer_instruction=genai_types.Content( - parts=[genai_types.Part(text=agent_info.instruction)] - ), - tools=agent_info.tool_declarations, - ) - ) - if agent_info.agent_resource_name: - labels = labels or {} - labels["vertex-ai-evaluation-agent-engine-id"] = ( - agent_info.agent_resource_name.split("reasoningEngines/")[-1] - ) - if not name: - name = f"evaluation_run_{uuid.uuid4()}" + resolved_inference_configs = _evals_common._resolve_inference_configs( + inference_configs, agent_info_pydantic + ) + resolved_labels = _evals_common._add_evaluation_run_labels( + labels, agent_info_pydantic + ) + resolved_name = name or f"evaluation_run_{uuid.uuid4()}" result = await self._create_evaluation_run( - name=name, - display_name=display_name or name, - data_source=dataset, + name=resolved_name, + display_name=display_name or resolved_name, + data_source=resolved_dataset, evaluation_config=evaluation_config, - inference_configs=inference_configs, - labels=labels, + inference_configs=resolved_inference_configs, + labels=resolved_labels, config=config, )