Skip to content

Commit 5b5e6bd

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals) - Add inference_configs to create_evaluation_run method in Vertex AI GenAI SDK evals
PiperOrigin-RevId: 821689846
1 parent 83553a9 commit 5b5e6bd

File tree

4 files changed

+184
-0
lines changed

4 files changed

+184
-0
lines changed

tests/unit/vertexai/genai/replays/test_create_evaluation_run.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,36 @@
1616

1717
from tests.unit.vertexai.genai.replays import pytest_helper
1818
from vertexai import types
19+
from google.genai import types as genai_types
1920
import pytest
2021

2122

2223
def test_create_eval_run_data_source_evaluation_set(client):
2324
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
25+
client._api_client._http_options.api_version = "v1beta1"
26+
tool = genai_types.Tool(
27+
function_declarations=[
28+
genai_types.FunctionDeclaration(
29+
name="get_weather",
30+
description="Get weather in a location",
31+
parameters={
32+
"type": "object",
33+
"properties": {"location": {"type": "string"}},
34+
},
35+
)
36+
]
37+
)
2438
evaluation_run = client.evals.create_evaluation_run(
2539
name="test4",
2640
display_name="test4",
2741
data_source=types.EvaluationRunDataSource(
2842
evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
2943
),
44+
agent_info=types.AgentInfo(
45+
name="agent-1",
46+
instruction="agent-1 instruction",
47+
tool_declarations=[tool],
48+
),
3049
dest="gs://lakeyk-test-limited/eval_run_output",
3150
)
3251
assert isinstance(evaluation_run, types.EvaluationRun)
@@ -36,6 +55,16 @@ def test_create_eval_run_data_source_evaluation_set(client):
3655
assert evaluation_run.data_source.evaluation_set == (
3756
"projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
3857
)
58+
assert evaluation_run.inference_configs[
59+
"agent-1"
60+
] == types.EvaluationRunInferenceConfig(
61+
agent_config=types.EvaluationRunAgentConfig(
62+
developer_instruction=genai_types.Content(
63+
parts=[genai_types.Part(text="agent-1 instruction")]
64+
),
65+
tools=[tool],
66+
)
67+
)
3968
assert evaluation_run.error is None
4069

4170

@@ -72,6 +101,7 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
72101
},
73102
)
74103
)
104+
assert evaluation_run.inference_configs is None
75105
assert evaluation_run.error is None
76106

77107

@@ -108,6 +138,8 @@ async def test_create_eval_run_async(client):
108138
"checkpoint_2": "checkpoint_2",
109139
},
110140
)
141+
assert evaluation_run.inference_configs is None
142+
assert evaluation_run.error is None
111143

112144

113145
pytestmark = pytest_helper.setup(

tests/unit/vertexai/genai/replays/test_get_evaluation_run.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,14 @@ def check_run_1957799200510967808(
137137
assert evaluation_run.evaluation_run_results.evaluation_set == (
138138
"projects/503583131166/locations/us-central1/evaluationSets/102386522778501120"
139139
)
140+
assert evaluation_run.inference_configs == {
141+
"checkpoint_1": types.EvaluationRunInferenceConfig(
142+
model="projects/503583131166/locations/us-central1/endpoints/9030177948249882624"
143+
),
144+
"checkpoint_2": types.EvaluationRunInferenceConfig(
145+
model="projects/503583131166/locations/us-central1/endpoints/7751155654076661760"
146+
),
147+
}
140148
assert evaluation_run.evaluation_run_results.summary_metrics == (
141149
types.SummaryMetric(
142150
metrics={

vertexai/_genai/evals.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ def _CreateEvaluationRunParameters_to_vertex(
8080
if getv(from_object, ["config"]) is not None:
8181
setv(to_object, ["config"], getv(from_object, ["config"]))
8282

83+
if getv(from_object, ["inference_configs"]) is not None:
84+
setv(to_object, ["inferenceConfigs"], getv(from_object, ["inference_configs"]))
85+
8386
return to_object
8487

8588

@@ -227,6 +230,9 @@ def _EvaluationRun_from_vertex(
227230
getv(from_object, ["evaluationResults"]),
228231
)
229232

233+
if getv(from_object, ["inferenceConfigs"]) is not None:
234+
setv(to_object, ["inference_configs"], getv(from_object, ["inferenceConfigs"]))
235+
230236
return to_object
231237

232238

@@ -456,6 +462,9 @@ def _create_evaluation_run(
456462
data_source: types.EvaluationRunDataSourceOrDict,
457463
evaluation_config: genai_types.EvaluationConfigOrDict,
458464
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
465+
inference_configs: Optional[
466+
dict[str, types.EvaluationRunInferenceConfigOrDict]
467+
] = None,
459468
) -> types.EvaluationRun:
460469
"""
461470
Creates an EvaluationRun.
@@ -467,6 +476,7 @@ def _create_evaluation_run(
467476
data_source=data_source,
468477
evaluation_config=evaluation_config,
469478
config=config,
479+
inference_configs=inference_configs,
470480
)
471481

472482
request_url_dict: Optional[dict[str, str]]
@@ -1289,19 +1299,34 @@ def create_evaluation_run(
12891299
display_name: Optional[str] = None,
12901300
data_source: types.EvaluationRunDataSource,
12911301
dest: str,
1302+
agent_info: Optional[types.AgentInfo] = None,
12921303
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
12931304
) -> types.EvaluationRun:
12941305
"""Creates an EvaluationRun."""
12951306
output_config = genai_types.OutputConfig(
12961307
gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
12971308
)
12981309
evaluation_config = genai_types.EvaluationConfig(output_config=output_config)
1310+
inference_configs = {}
1311+
if agent_info:
1312+
logger.warning(
1313+
"The agent_info field is experimental and may change in future versions."
1314+
)
1315+
inference_configs[agent_info.name] = types.EvaluationRunInferenceConfig(
1316+
agent_config=types.EvaluationRunAgentConfig(
1317+
developer_instruction=genai_types.Content(
1318+
parts=[genai_types.Part(text=agent_info.instruction)]
1319+
),
1320+
tools=agent_info.tool_declarations,
1321+
)
1322+
)
12991323

13001324
return self._create_evaluation_run( # type: ignore[no-any-return]
13011325
name=name,
13021326
display_name=display_name,
13031327
data_source=data_source,
13041328
evaluation_config=evaluation_config,
1329+
inference_configs=inference_configs,
13051330
config=config,
13061331
)
13071332

@@ -1509,6 +1534,9 @@ async def _create_evaluation_run(
15091534
data_source: types.EvaluationRunDataSourceOrDict,
15101535
evaluation_config: genai_types.EvaluationConfigOrDict,
15111536
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
1537+
inference_configs: Optional[
1538+
dict[str, types.EvaluationRunInferenceConfigOrDict]
1539+
] = None,
15121540
) -> types.EvaluationRun:
15131541
"""
15141542
Creates an EvaluationRun.
@@ -1520,6 +1548,7 @@ async def _create_evaluation_run(
15201548
data_source=data_source,
15211549
evaluation_config=evaluation_config,
15221550
config=config,
1551+
inference_configs=inference_configs,
15231552
)
15241553

15251554
request_url_dict: Optional[dict[str, str]]
@@ -2055,19 +2084,34 @@ async def create_evaluation_run(
20552084
display_name: Optional[str] = None,
20562085
data_source: types.EvaluationRunDataSource,
20572086
dest: str,
2087+
agent_info: Optional[types.AgentInfo] = None,
20582088
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
20592089
) -> types.EvaluationRun:
20602090
"""Creates an EvaluationRun."""
20612091
output_config = genai_types.OutputConfig(
20622092
gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
20632093
)
20642094
evaluation_config = genai_types.EvaluationConfig(output_config=output_config)
2095+
inference_configs = {}
2096+
if agent_info:
2097+
logger.warning(
2098+
"The agent_info field is experimental and may change in future versions."
2099+
)
2100+
inference_configs[agent_info.name] = types.EvaluationRunInferenceConfig(
2101+
agent_config=types.EvaluationRunAgentConfig(
2102+
developer_instruction=genai_types.Content(
2103+
parts=[genai_types.Part(text=agent_info.instruction)]
2104+
),
2105+
tools=agent_info.tool_declarations,
2106+
)
2107+
)
20652108

20662109
result = await self._create_evaluation_run( # type: ignore[no-any-return]
20672110
name=name,
20682111
display_name=display_name,
20692112
data_source=data_source,
20702113
evaluation_config=evaluation_config,
2114+
inference_configs=inference_configs,
20712115
config=config,
20722116
)
20732117

vertexai/_genai/types.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,9 @@ class _CreateEvaluationRunParameters(_common.BaseModel):
995995
config: Optional[CreateEvaluationRunConfig] = Field(
996996
default=None, description=""""""
997997
)
998+
inference_configs: Optional[dict[str, "EvaluationRunInferenceConfig"]] = Field(
999+
default=None, description=""""""
1000+
)
9981001

9991002

10001003
class _CreateEvaluationRunParametersDict(TypedDict, total=False):
@@ -1015,6 +1018,9 @@ class _CreateEvaluationRunParametersDict(TypedDict, total=False):
10151018
config: Optional[CreateEvaluationRunConfigDict]
10161019
""""""
10171020

1021+
inference_configs: Optional[dict[str, "EvaluationRunInferenceConfigDict"]]
1022+
""""""
1023+
10181024

10191025
_CreateEvaluationRunParametersOrDict = Union[
10201026
_CreateEvaluationRunParameters, _CreateEvaluationRunParametersDict
@@ -1678,6 +1684,32 @@ class EvaluationRun(_common.BaseModel):
16781684
default=None,
16791685
description="""The parsed EvaluationItem results for the evaluation run. This is only populated when include_evaluation_items is set to True.""",
16801686
)
1687+
inference_configs: Optional[dict[str, "EvaluationRunInferenceConfig"]] = Field(
1688+
default=None,
1689+
description="""This field is experimental and may change in future versions. The inference configs for the evaluation run.""",
1690+
)
1691+
1692+
# TODO(b/448806531): Remove all the overridden _from_response methods once the
1693+
# ticket is resolved and published.
1694+
@classmethod
1695+
def _from_response(
1696+
cls: typing.Type["EvaluationRun"],
1697+
*,
1698+
response: dict[str, object],
1699+
kwargs: dict[str, object],
1700+
) -> "EvaluationRun":
1701+
"""Converts a dictionary response into a EvaluationRun object."""
1702+
1703+
snaked_response = _camel_key_to_snake(response)
1704+
if (
1705+
"evaluation_run_results" in response
1706+
and "summaryMetrics" in response["evaluation_run_results"]
1707+
):
1708+
snaked_response["evaluation_run_results"]["summary_metrics"] = response[
1709+
"evaluation_run_results"
1710+
]["summaryMetrics"]
1711+
result = super()._from_response(response=snaked_response, kwargs=kwargs)
1712+
return result
16811713

16821714
def show(self) -> None:
16831715
"""Shows the evaluation result."""
@@ -1734,6 +1766,9 @@ class EvaluationRunDict(TypedDict, total=False):
17341766
evaluation_item_results: Optional[EvaluationResultDict]
17351767
"""The parsed EvaluationItem results for the evaluation run. This is only populated when include_evaluation_items is set to True."""
17361768

1769+
inference_configs: Optional[dict[str, "EvaluationRunInferenceConfigDict"]]
1770+
"""This field is experimental and may change in future versions. The inference configs for the evaluation run."""
1771+
17371772

17381773
EvaluationRunOrDict = Union[EvaluationRun, EvaluationRunDict]
17391774

@@ -11867,6 +11902,71 @@ class EvalCaseMetricResultDict(TypedDict, total=False):
1186711902
EvalCaseMetricResultOrDict = Union[EvalCaseMetricResult, EvalCaseMetricResultDict]
1186811903

1186911904

11905+
class EvaluationRunAgentConfig(_common.BaseModel):
11906+
"""This field is experimental and may change in future versions.
11907+
11908+
Agent config for an evaluation run.
11909+
"""
11910+
11911+
developer_instruction: Optional[genai_types.Content] = Field(
11912+
default=None, description="""The developer instruction for the agent."""
11913+
)
11914+
tools: Optional[list[genai_types.Tool]] = Field(
11915+
default=None, description="""The tools available to the agent."""
11916+
)
11917+
11918+
11919+
class EvaluationRunAgentConfigDict(TypedDict, total=False):
11920+
"""This field is experimental and may change in future versions.
11921+
11922+
Agent config for an evaluation run.
11923+
"""
11924+
11925+
developer_instruction: Optional[genai_types.ContentDict]
11926+
"""The developer instruction for the agent."""
11927+
11928+
tools: Optional[list[genai_types.ToolDict]]
11929+
"""The tools available to the agent."""
11930+
11931+
11932+
EvaluationRunAgentConfigOrDict = Union[
11933+
EvaluationRunAgentConfig, EvaluationRunAgentConfigDict
11934+
]
11935+
11936+
11937+
class EvaluationRunInferenceConfig(_common.BaseModel):
11938+
"""This field is experimental and may change in future versions.
11939+
11940+
Configuration that describes an agent.
11941+
"""
11942+
11943+
agent_config: Optional[EvaluationRunAgentConfig] = Field(
11944+
default=None, description="""The agent config."""
11945+
)
11946+
model: Optional[str] = Field(
11947+
default=None,
11948+
description="""The fully qualified name of the publisher model or endpoint to use for inference.""",
11949+
)
11950+
11951+
11952+
class EvaluationRunInferenceConfigDict(TypedDict, total=False):
11953+
"""This field is experimental and may change in future versions.
11954+
11955+
Configuration that describes an agent.
11956+
"""
11957+
11958+
agent_config: Optional[EvaluationRunAgentConfigDict]
11959+
"""The agent config."""
11960+
11961+
model: Optional[str]
11962+
"""The fully qualified name of the publisher model or endpoint to use for inference."""
11963+
11964+
11965+
EvaluationRunInferenceConfigOrDict = Union[
11966+
EvaluationRunInferenceConfig, EvaluationRunInferenceConfigDict
11967+
]
11968+
11969+
1187011970
class SessionInput(_common.BaseModel):
1187111971
"""This field is experimental and may change in future versions.
1187211972

0 commit comments

Comments
 (0)