diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py index e33b48b69b..9b6cc31735 100644 --- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py +++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py @@ -19,7 +19,7 @@ from google.genai import types as genai_types import pytest -GCS_DEST = "gs://lakeyk-test-limited/eval_run_output" +GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output" UNIVERSAL_AR_METRIC = types.EvaluationRunMetric( metric="universal_ar_v1", metric_config=types.UnifiedMetric( @@ -51,9 +51,6 @@ # TODO(b/431231205): Re-enable once Unified Metrics are in prod. # def test_create_eval_run_data_source_evaluation_set(client): # """Tests that create_evaluation_run() creates a correctly structured EvaluationRun.""" -# client._api_client._http_options.base_url = ( -# "https://us-central1-autopush-aiplatform.sandbox.googleapis.com/" -# ) # client._api_client._http_options.api_version = "v1beta1" # tool = genai_types.Tool( # function_declarations=[ @@ -80,10 +77,12 @@ # LLM_METRIC # ], # agent_info=types.AgentInfo( +# agent="project/123/locations/us-central1/reasoningEngines/456", # name="agent-1", # instruction="agent-1 instruction", # tool_declarations=[tool], # ), +# labels={"label1": "value1"}, # ) # assert isinstance(evaluation_run, types.EvaluationRun) # assert evaluation_run.display_name == "test4" @@ -108,6 +107,10 @@ # tools=[tool], # ) # ) +# assert evaluation_run.labels == { +# "vertex-ai-evaluation-agent-engine-id": "456", +# "label1": "value1", +# } # assert evaluation_run.error is None @@ -127,6 +130,7 @@ def test_create_eval_run_data_source_bigquery_request_set(client): }, ) ), + labels={"label1": "value1"}, dest=GCS_DEST, ) assert isinstance(evaluation_run, types.EvaluationRun) @@ -150,6 +154,9 @@ def test_create_eval_run_data_source_bigquery_request_set(client): ), ) assert evaluation_run.inference_configs is None + assert evaluation_run.labels == { + "label1": "value1", + } assert evaluation_run.error is None @@ -289,6 +296,8 @@ async def test_create_eval_run_async(client): assert evaluation_run.error is None assert evaluation_run.inference_configs is None assert evaluation_run.error is None + assert evaluation_run.labels is None + assert evaluation_run.error is None pytestmark = pytest_helper.setup( diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 74a681c152..f5aa8d6b47 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -77,6 +77,9 @@ def _CreateEvaluationRunParameters_to_vertex( if getv(from_object, ["evaluation_config"]) is not None: setv(to_object, ["evaluationConfig"], getv(from_object, ["evaluation_config"])) + if getv(from_object, ["labels"]) is not None: + setv(to_object, ["labels"], getv(from_object, ["labels"])) + if getv(from_object, ["config"]) is not None: setv(to_object, ["config"], getv(from_object, ["config"])) @@ -236,6 +239,9 @@ def _EvaluationRun_from_vertex( if getv(from_object, ["inferenceConfigs"]) is not None: setv(to_object, ["inference_configs"], getv(from_object, ["inferenceConfigs"])) + if getv(from_object, ["labels"]) is not None: + setv(to_object, ["labels"], getv(from_object, ["labels"])) + return to_object @@ -464,6 +470,7 @@ def _create_evaluation_run( display_name: Optional[str] = None, data_source: types.EvaluationRunDataSourceOrDict, evaluation_config: types.EvaluationRunConfigOrDict, + labels: Optional[dict[str, str]] = None, config: Optional[types.CreateEvaluationRunConfigOrDict] = None, inference_configs: Optional[ dict[str, types.EvaluationRunInferenceConfigOrDict] @@ -478,6 +485,7 @@ def _create_evaluation_run( display_name=display_name, data_source=data_source, evaluation_config=evaluation_config, + labels=labels, config=config, inference_configs=inference_configs, ) @@ -1316,6 +1324,7 @@ def create_evaluation_run( list[types.EvaluationRunMetricOrDict] ] = None, # TODO: Make required unified metrics available in prod. agent_info: Optional[types.AgentInfo] = None, + labels: Optional[dict[str, str]] = None, config: Optional[types.CreateEvaluationRunConfigOrDict] = None, ) -> types.EvaluationRun: """Creates an EvaluationRun.""" @@ -1353,6 +1362,17 @@ def create_evaluation_run( tools=agent_info.tool_declarations, ) ) + if ( + not agent_info.agent + or len(agent_info.agent.split("reasoningEngines/")) != 2 + ): + raise ValueError( + "agent_info.agent cannot be empty. Please provide a valid reasoning engine resource name in the format of projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine}." + ) + labels = labels or {} + labels["vertex-ai-evaluation-agent-engine-id"] = agent_info.agent.split( + "reasoningEngines/" + )[-1] return self._create_evaluation_run( # type: ignore[no-any-return] name=name, @@ -1360,6 +1380,7 @@ def create_evaluation_run( data_source=dataset, evaluation_config=evaluation_config, inference_configs=inference_configs, + labels=labels, config=config, ) @@ -1566,6 +1587,7 @@ async def _create_evaluation_run( display_name: Optional[str] = None, data_source: types.EvaluationRunDataSourceOrDict, evaluation_config: types.EvaluationRunConfigOrDict, + labels: Optional[dict[str, str]] = None, config: Optional[types.CreateEvaluationRunConfigOrDict] = None, inference_configs: Optional[ dict[str, types.EvaluationRunInferenceConfigOrDict] @@ -1580,6 +1602,7 @@ async def _create_evaluation_run( display_name=display_name, data_source=data_source, evaluation_config=evaluation_config, + labels=labels, config=config, inference_configs=inference_configs, ) @@ -2121,6 +2144,7 @@ async def create_evaluation_run( list[types.EvaluationRunMetricOrDict] ] = None, # TODO: Make required unified metrics available in prod. agent_info: Optional[types.AgentInfo] = None, + labels: Optional[dict[str, str]] = None, config: Optional[types.CreateEvaluationRunConfigOrDict] = None, ) -> types.EvaluationRun: """Creates an EvaluationRun.""" @@ -2158,6 +2182,17 @@ async def create_evaluation_run( tools=agent_info.tool_declarations, ) ) + if ( + not agent_info.agent + or len(agent_info.agent.split("reasoningEngines/")) != 2 + ): + raise ValueError( + "agent_info.agent cannot be empty. Please provide a valid reasoning engine resource name in the format of projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine}." + ) + labels = labels or {} + labels["vertex-ai-evaluation-agent-engine-id"] = agent_info.agent.split( + "reasoningEngines/" + )[-1] result = await self._create_evaluation_run( # type: ignore[no-any-return] name=name, @@ -2165,6 +2200,7 @@ async def create_evaluation_run( data_source=dataset, evaluation_config=evaluation_config, inference_configs=inference_configs, + labels=labels, config=config, ) diff --git a/vertexai/_genai/types.py b/vertexai/_genai/types.py index 15e232c586..17d0221b6d 100644 --- a/vertexai/_genai/types.py +++ b/vertexai/_genai/types.py @@ -1220,6 +1220,7 @@ class _CreateEvaluationRunParameters(_common.BaseModel): evaluation_config: Optional[EvaluationRunConfig] = Field( default=None, description="""""" ) + labels: Optional[dict[str, str]] = Field(default=None, description="""""") config: Optional[CreateEvaluationRunConfig] = Field( default=None, description="""""" ) @@ -1243,6 +1244,9 @@ class _CreateEvaluationRunParametersDict(TypedDict, total=False): evaluation_config: Optional[EvaluationRunConfigDict] """""" + labels: Optional[dict[str, str]] + """""" + config: Optional[CreateEvaluationRunConfigDict] """""" @@ -1482,6 +1486,11 @@ class EventDict(TypedDict, total=False): class AgentInfo(_common.BaseModel): """The agent info of an agent, used for agent eval.""" + agent: Optional[str] = Field( + default=None, + description="""The agent engine used to run agent. Agent engine resource name in str type, with format + `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""", + ) name: Optional[str] = Field( default=None, description="""Agent name, used as an identifier.""" ) @@ -1499,6 +1508,10 @@ class AgentInfo(_common.BaseModel): class AgentInfoDict(TypedDict, total=False): """The agent info of an agent, used for agent eval.""" + agent: Optional[str] + """The agent engine used to run agent. Agent engine resource name in str type, with format + `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""" + name: Optional[str] """Agent name, used as an identifier.""" @@ -1919,6 +1932,7 @@ class EvaluationRun(_common.BaseModel): default=None, description="""This field is experimental and may change in future versions. The inference configs for the evaluation run.""", ) + labels: Optional[dict[str, str]] = Field(default=None, description="""""") # TODO(b/448806531): Remove all the overridden _from_response methods once the # ticket is resolved and published. @@ -2003,6 +2017,9 @@ class EvaluationRunDict(TypedDict, total=False): inference_configs: Optional[dict[str, "EvaluationRunInferenceConfigDict"]] """This field is experimental and may change in future versions. The inference configs for the evaluation run.""" + labels: Optional[dict[str, str]] + """""" + EvaluationRunOrDict = Union[EvaluationRun, EvaluationRunDict]