@@ -1253,6 +1253,143 @@ def test_run_inference_with_agent_engine_with_response_column_raises_error(
12531253 "'intermediate_events' or 'response' columns"
12541254 ) in str (excinfo .value )
12551255
1256+ @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
1257+ @mock .patch (
1258+ "vertexai._genai._evals_common.InMemorySessionService"
1259+ )
1260+ @mock .patch ("vertexai._genai._evals_common.Runner" )
1261+ @mock .patch ("vertexai._genai._evals_common.LlmAgent" )
1262+ def test_run_inference_with_local_agent (
1263+ self ,
1264+ mock_llm_agent ,
1265+ mock_runner ,
1266+ mock_session_service ,
1267+ mock_eval_dataset_loader ,
1268+ ):
1269+ mock_df = pd .DataFrame (
1270+ {
1271+ "prompt" : ["agent prompt" , "agent prompt 2" ],
1272+ "session_inputs" : [
1273+ {
1274+ "user_id" : "123" ,
1275+ "state" : {"a" : "1" },
1276+ },
1277+ {
1278+ "user_id" : "456" ,
1279+ "state" : {"b" : "2" },
1280+ },
1281+ ],
1282+ }
1283+ )
1284+ mock_eval_dataset_loader .return_value .load .return_value = mock_df .to_dict (
1285+ orient = "records"
1286+ )
1287+
1288+ mock_agent_instance = mock .Mock ()
1289+ mock_llm_agent .return_value = mock_agent_instance
1290+ mock_session_service .return_value .create_session = mock .AsyncMock ()
1291+ mock_runner_instance = mock_runner .return_value
1292+ stream_run_return_value_1 = [
1293+ mock .Mock (
1294+ model_dump = lambda : {
1295+ "id" : "1" ,
1296+ "content" : {"parts" : [{"text" : "intermediate1" }]},
1297+ "timestamp" : 123 ,
1298+ "author" : "model" ,
1299+ }
1300+ ),
1301+ mock .Mock (
1302+ model_dump = lambda : {
1303+ "id" : "2" ,
1304+ "content" : {"parts" : [{"text" : "agent response" }]},
1305+ "timestamp" : 124 ,
1306+ "author" : "model" ,
1307+ }
1308+ ),
1309+ ]
1310+ stream_run_return_value_2 = [
1311+ mock .Mock (
1312+ model_dump = lambda : {
1313+ "id" : "3" ,
1314+ "content" : {"parts" : [{"text" : "intermediate2" }]},
1315+ "timestamp" : 125 ,
1316+ "author" : "model" ,
1317+ }
1318+ ),
1319+ mock .Mock (
1320+ model_dump = lambda : {
1321+ "id" : "4" ,
1322+ "content" : {"parts" : [{"text" : "agent response 2" }]},
1323+ "timestamp" : 126 ,
1324+ "author" : "model" ,
1325+ }
1326+ ),
1327+ ]
1328+
1329+ async def async_iterator (items ):
1330+ for item in items :
1331+ yield item
1332+
1333+ mock_runner_instance .run_async .side_effect = [
1334+ async_iterator (stream_run_return_value_1 ),
1335+ async_iterator (stream_run_return_value_2 ),
1336+ ]
1337+
1338+ inference_result = self .client .evals .run_inference (
1339+ agent = mock_agent_instance ,
1340+ src = mock_df ,
1341+ )
1342+
1343+ mock_eval_dataset_loader .return_value .load .assert_called_once_with (mock_df )
1344+ assert mock_session_service .call_count == 2
1345+ mock_runner .assert_called_with (
1346+ agent = mock_agent_instance ,
1347+ app_name = "local agent run" ,
1348+ session_service = mock_session_service .return_value ,
1349+ )
1350+ assert mock_runner .call_count == 2
1351+ assert mock_runner_instance .run_async .call_count == 2
1352+
1353+ pd .testing .assert_frame_equal (
1354+ inference_result .eval_dataset_df ,
1355+ pd .DataFrame (
1356+ {
1357+ "prompt" : ["agent prompt" , "agent prompt 2" ],
1358+ "session_inputs" : [
1359+ {
1360+ "user_id" : "123" ,
1361+ "state" : {"a" : "1" },
1362+ },
1363+ {
1364+ "user_id" : "456" ,
1365+ "state" : {"b" : "2" },
1366+ },
1367+ ],
1368+ "intermediate_events" : [
1369+ [
1370+ {
1371+ "event_id" : "1" ,
1372+ "content" : {"parts" : [{"text" : "intermediate1" }]},
1373+ "creation_timestamp" : 123 ,
1374+ "author" : "model" ,
1375+ }
1376+ ],
1377+ [
1378+ {
1379+ "event_id" : "3" ,
1380+ "content" : {"parts" : [{"text" : "intermediate2" }]},
1381+ "creation_timestamp" : 125 ,
1382+ "author" : "model" ,
1383+ }
1384+ ],
1385+ ],
1386+ "response" : ["agent response" , "agent response 2" ],
1387+ }
1388+ ),
1389+ )
1390+ assert inference_result .candidate_name is None
1391+ assert inference_result .gcs_source is None
1392+
12561393 def test_run_inference_with_litellm_string_prompt_format (
12571394 self ,
12581395 mock_api_client_fixture ,
@@ -1605,6 +1742,7 @@ def test_run_agent_internal_success(self, mock_run_agent):
16051742 result_df = _evals_common ._run_agent_internal (
16061743 api_client = mock_api_client ,
16071744 agent_engine = mock_agent_engine ,
1745+ agent = None ,
16081746 prompt_dataset = prompt_dataset ,
16091747 )
16101748
@@ -1635,6 +1773,7 @@ def test_run_agent_internal_error_response(self, mock_run_agent):
16351773 result_df = _evals_common ._run_agent_internal (
16361774 api_client = mock_api_client ,
16371775 agent_engine = mock_agent_engine ,
1776+ agent = None ,
16381777 prompt_dataset = prompt_dataset ,
16391778 )
16401779
@@ -1661,6 +1800,7 @@ def test_run_agent_internal_malformed_event(self, mock_run_agent):
16611800 result_df = _evals_common ._run_agent_internal (
16621801 api_client = mock_api_client ,
16631802 agent_engine = mock_agent_engine ,
1803+ agent = None ,
16641804 prompt_dataset = prompt_dataset ,
16651805 )
16661806 assert "response" in result_df .columns
@@ -4990,7 +5130,9 @@ def test_execute_evaluation_adds_creation_timestamp(
49905130 frozenset (["summarization_quality" ]),
49915131 )
49925132 @mock .patch ("time.sleep" , return_value = None )
4993- @mock .patch ("vertexai._genai.evals.Evals._evaluate_instances" )
5133+ @mock .patch (
5134+ "vertexai._genai.evals.Evals._evaluate_instances"
5135+ )
49945136 def test_predefined_metric_retry_on_resource_exhausted (
49955137 self ,
49965138 mock_private_evaluate_instances ,
@@ -5043,7 +5185,9 @@ def test_predefined_metric_retry_on_resource_exhausted(
50435185 frozenset (["summarization_quality" ]),
50445186 )
50455187 @mock .patch ("time.sleep" , return_value = None )
5046- @mock .patch ("vertexai._genai.evals.Evals._evaluate_instances" )
5188+ @mock .patch (
5189+ "vertexai._genai.evals.Evals._evaluate_instances"
5190+ )
50475191 def test_predefined_metric_retry_fail_on_resource_exhausted (
50485192 self ,
50495193 mock_private_evaluate_instances ,
0 commit comments