Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions vertexai/_genai/_evals_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,80 @@ def _extract_contents_for_inference(
return request_dict_or_raw_text


def _resolve_dataset(
api_client: BaseApiClient,
dataset: Union[types.EvaluationRunDataSource, types.EvaluationDataset],
dest: str,
agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
) -> types.EvaluationRunDataSource:
"""Resolves dataset for the evaluation run."""
if isinstance(dataset, types.EvaluationDataset):
candidate_name = _get_candidate_name(dataset, agent_info_pydantic)
eval_set = _create_evaluation_set_from_dataframe(
api_client,
dest,
dataset.eval_dataset_df,
candidate_name,
)
dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
return dataset


def _resolve_inference_configs(
inference_configs: Optional[
dict[str, types.EvaluationRunInferenceConfigOrDict]
] = None,
agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
) -> Optional[dict[str, types.EvaluationRunInferenceConfigOrDict]]:
"""Resolves inference configs for the evaluation run."""
if agent_info_pydantic and agent_info_pydantic.name:
inference_configs = {}
inference_configs[agent_info_pydantic.name] = (
types.EvaluationRunInferenceConfig(
agent_config=types.EvaluationRunAgentConfig(
developer_instruction=genai_types.Content(
parts=[genai_types.Part(text=agent_info_pydantic.instruction)]
),
tools=agent_info_pydantic.tool_declarations,
)
)
)
return inference_configs


def _add_evaluation_run_labels(
labels: Optional[dict[str, str]] = None,
agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
) -> Optional[dict[str, str]]:
"""Adds labels to the evaluation run."""
if agent_info_pydantic and agent_info_pydantic.agent_resource_name:
labels = labels or {}
labels["vertex-ai-evaluation-agent-engine-id"] = (
agent_info_pydantic.agent_resource_name.split("reasoningEngines/")[-1]
)
return labels


def _get_candidate_name(
dataset: types.EvaluationDataset,
agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
) -> Optional[str]:
"""Internal helper to get candidate name."""
if agent_info_pydantic is not None and (
dataset.candidate_name
and agent_info_pydantic
and agent_info_pydantic.name
and dataset.candidate_name != agent_info_pydantic.name
):
logger.warning(
"Evaluation dataset candidate_name and agent_info.name are different."
" Please make sure this is intended."
)
elif dataset.candidate_name is None and agent_info_pydantic:
return agent_info_pydantic.name
return dataset.candidate_name or None


def _execute_inference_concurrently(
api_client: BaseApiClient,
prompt_dataset: pd.DataFrame,
Expand Down Expand Up @@ -1858,6 +1932,9 @@ def _object_to_dict(obj: Any) -> Union[dict[str, Any], Any]:
result[key] = value
elif isinstance(value, (list, tuple)):
result[key] = [_object_to_dict(item) for item in value]
# Add recursive handling for dictionaries
elif isinstance(value, dict):
result[key] = {k: _object_to_dict(v) for k, v in value.items()}
elif isinstance(value, bytes):
result[key] = base64.b64encode(value).decode("utf-8")
elif hasattr(value, "__dict__"): # Nested object
Expand Down
138 changes: 40 additions & 98 deletions vertexai/_genai/evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1625,32 +1625,14 @@ def create_evaluation_run(
raise ValueError(
"At most one of agent_info or inference_configs can be provided."
)
agent_info_pydantic: types.evals.AgentInfo = types.evals.AgentInfo()
if agent_info:
if isinstance(agent_info, dict):
agent_info_pydantic = types.evals.AgentInfo.model_validate(agent_info)
else:
agent_info_pydantic = agent_info
if isinstance(dataset, types.EvaluationDataset):
if dataset.eval_dataset_df is None:
raise ValueError(
"EvaluationDataset must have eval_dataset_df populated."
)
if agent_info_pydantic is not None and (
dataset.candidate_name
and agent_info_pydantic
and agent_info_pydantic.name
and dataset.candidate_name != agent_info_pydantic.name
):
logger.warning(
"Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
)
elif dataset.candidate_name is None and agent_info_pydantic:
dataset.candidate_name = agent_info_pydantic.name
eval_set = _evals_common._create_evaluation_set_from_dataframe(
self._api_client, dest, dataset.eval_dataset_df, dataset.candidate_name
)
dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
agent_info_pydantic = (
types.evals.AgentInfo.model_validate(agent_info)
if isinstance(agent_info, dict)
else (agent_info or types.evals.AgentInfo())
)
resolved_dataset = _evals_common._resolve_dataset(
self._api_client, dataset, dest, agent_info_pydantic
)
output_config = genai_types.OutputConfig(
gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
)
Expand All @@ -1660,37 +1642,20 @@ def create_evaluation_run(
evaluation_config = types.EvaluationRunConfig(
output_config=output_config, metrics=resolved_metrics
)
if agent_info_pydantic and agent_info_pydantic.name is not None:
inference_configs = {}
inference_configs[agent_info_pydantic.name] = (
types.EvaluationRunInferenceConfig(
agent_config=types.EvaluationRunAgentConfig(
developer_instruction=genai_types.Content(
parts=[
genai_types.Part(text=agent_info_pydantic.instruction)
]
),
tools=agent_info_pydantic.tool_declarations,
)
)
)
if agent_info_pydantic.agent_resource_name:
labels = labels or {}
labels["vertex-ai-evaluation-agent-engine-id"] = (
agent_info_pydantic.agent_resource_name.split("reasoningEngines/")[
-1
]
)
if not name:
name = f"evaluation_run_{uuid.uuid4()}"

resolved_inference_configs = _evals_common._resolve_inference_configs(
inference_configs, agent_info_pydantic
)
resolved_labels = _evals_common._add_evaluation_run_labels(
labels, agent_info_pydantic
)
resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
return self._create_evaluation_run(
name=name,
display_name=display_name or name,
data_source=dataset,
name=resolved_name,
display_name=display_name or resolved_name,
data_source=resolved_dataset,
evaluation_config=evaluation_config,
inference_configs=inference_configs,
labels=labels,
inference_configs=resolved_inference_configs,
labels=resolved_labels,
config=config,
)

Expand Down Expand Up @@ -2495,27 +2460,14 @@ async def create_evaluation_run(
raise ValueError(
"At most one of agent_info or inference_configs can be provided."
)
if agent_info and isinstance(agent_info, dict):
agent_info = types.evals.AgentInfo.model_validate(agent_info)
if isinstance(dataset, types.EvaluationDataset):
if dataset.eval_dataset_df is None:
raise ValueError(
"EvaluationDataset must have eval_dataset_df populated."
)
if agent_info is not None and (
dataset.candidate_name
and agent_info.name
and dataset.candidate_name != agent_info.name
):
logger.warning(
"Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
)
elif dataset.candidate_name is None and agent_info:
dataset.candidate_name = agent_info.name
eval_set = _evals_common._create_evaluation_set_from_dataframe(
self._api_client, dest, dataset.eval_dataset_df, dataset.candidate_name
)
dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
agent_info_pydantic = (
types.evals.AgentInfo.model_validate(agent_info)
if isinstance(agent_info, dict)
else (agent_info or types.evals.AgentInfo())
)
resolved_dataset = _evals_common._resolve_dataset(
self._api_client, dataset, dest, agent_info_pydantic
)
output_config = genai_types.OutputConfig(
gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
)
Expand All @@ -2525,31 +2477,21 @@ async def create_evaluation_run(
evaluation_config = types.EvaluationRunConfig(
output_config=output_config, metrics=resolved_metrics
)
if agent_info and agent_info.name is not None:
inference_configs = {}
inference_configs[agent_info.name] = types.EvaluationRunInferenceConfig(
agent_config=types.EvaluationRunAgentConfig(
developer_instruction=genai_types.Content(
parts=[genai_types.Part(text=agent_info.instruction)]
),
tools=agent_info.tool_declarations,
)
)
if agent_info.agent_resource_name:
labels = labels or {}
labels["vertex-ai-evaluation-agent-engine-id"] = (
agent_info.agent_resource_name.split("reasoningEngines/")[-1]
)
if not name:
name = f"evaluation_run_{uuid.uuid4()}"
resolved_inference_configs = _evals_common._resolve_inference_configs(
inference_configs, agent_info_pydantic
)
resolved_labels = _evals_common._add_evaluation_run_labels(
labels, agent_info_pydantic
)
resolved_name = name or f"evaluation_run_{uuid.uuid4()}"

result = await self._create_evaluation_run(
name=name,
display_name=display_name or name,
data_source=dataset,
name=resolved_name,
display_name=display_name or resolved_name,
data_source=resolved_dataset,
evaluation_config=evaluation_config,
inference_configs=inference_configs,
labels=labels,
inference_configs=resolved_inference_configs,
labels=resolved_labels,
config=config,
)

Expand Down
Loading