Skip to content

Commit 531d223

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals): Add warning message when tool usage is empty for tool_use_quality
PiperOrigin-RevId: 828583199
1 parent acb6cab commit 531d223

File tree

2 files changed

+111
-0
lines changed

2 files changed

+111
-0
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,6 +1515,64 @@ def test_run_inference_with_litellm_parsing(
15151515
pd.testing.assert_frame_equal(call_kwargs["prompt_dataset"], mock_df)
15161516

15171517

1518+
@pytest.mark.usefixtures("google_auth_mock")
1519+
class TestEvalsMetricHandlers:
1520+
"""Unit tests for utility functions in _evals_metric_handlers."""
1521+
1522+
def test_has_tool_call_with_tool_call(self):
1523+
events = [
1524+
vertexai_genai_types.evals.Event(
1525+
event_id="1",
1526+
content=genai_types.Content(
1527+
parts=[
1528+
genai_types.Part(
1529+
function_call=genai_types.FunctionCall(
1530+
name="search", args={}
1531+
)
1532+
)
1533+
]
1534+
),
1535+
)
1536+
]
1537+
assert _evals_metric_handlers._has_tool_call(events)
1538+
1539+
def test_has_tool_call_no_tool_call(self):
1540+
events = [
1541+
vertexai_genai_types.evals.Event(
1542+
event_id="1",
1543+
content=genai_types.Content(parts=[genai_types.Part(text="hello")]),
1544+
)
1545+
]
1546+
assert not _evals_metric_handlers._has_tool_call(events)
1547+
1548+
def test_has_tool_call_empty_events(self):
1549+
assert not _evals_metric_handlers._has_tool_call([])
1550+
1551+
def test_has_tool_call_none_events(self):
1552+
assert not _evals_metric_handlers._has_tool_call(None)
1553+
1554+
def test_has_tool_call_mixed_events(self):
1555+
events = [
1556+
vertexai_genai_types.evals.Event(
1557+
event_id="1",
1558+
content=genai_types.Content(parts=[genai_types.Part(text="hello")]),
1559+
),
1560+
vertexai_genai_types.evals.Event(
1561+
event_id="2",
1562+
content=genai_types.Content(
1563+
parts=[
1564+
genai_types.Part(
1565+
function_call=genai_types.FunctionCall(
1566+
name="search", args={}
1567+
)
1568+
)
1569+
]
1570+
),
1571+
),
1572+
]
1573+
assert _evals_metric_handlers._has_tool_call(events)
1574+
1575+
15181576
@pytest.mark.usefixtures("google_auth_mock")
15191577
class TestRunAgentInternal:
15201578
"""Unit tests for the _run_agent_internal function."""
@@ -3890,6 +3948,39 @@ def test_eval_case_to_agent_data_agent_info_empty(self):
38903948

38913949
assert agent_data.agent_config is None
38923950

3951+
@mock.patch.object(_evals_metric_handlers.logger, "warning")
3952+
def test_tool_use_quality_metric_no_tool_call_logs_warning(
3953+
self, mock_warning, mock_api_client_fixture
3954+
):
3955+
"""Tests that PredefinedMetricHandler warns for tool_use_quality_v1 if no tool call."""
3956+
metric = vertexai_genai_types.Metric(name="tool_use_quality_v1")
3957+
handler = _evals_metric_handlers.PredefinedMetricHandler(
3958+
module=evals.Evals(api_client_=mock_api_client_fixture), metric=metric
3959+
)
3960+
eval_case = vertexai_genai_types.EvalCase(
3961+
eval_case_id="case-no-tool-call",
3962+
prompt=genai_types.Content(parts=[genai_types.Part(text="Hello")]),
3963+
responses=[
3964+
vertexai_genai_types.ResponseCandidate(
3965+
response=genai_types.Content(parts=[genai_types.Part(text="Hi")])
3966+
)
3967+
],
3968+
intermediate_events=[
3969+
vertexai_genai_types.evals.Event(
3970+
event_id="event1",
3971+
content=genai_types.Content(
3972+
parts=[genai_types.Part(text="intermediate event")]
3973+
),
3974+
)
3975+
],
3976+
)
3977+
handler._build_request_payload(eval_case, response_index=0)
3978+
mock_warning.assert_called_once_with(
3979+
"Metric 'tool_use_quality_v1' requires tool usage in "
3980+
"'intermediate_events', but no tool usage was found for case %s.",
3981+
"case-no-tool-call",
3982+
)
3983+
38933984

38943985
@pytest.mark.usefixtures("google_auth_mock")
38953986
class TestLLMMetricHandlerPayload:

vertexai/_genai/_evals_metric_handlers.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,18 @@
3939
_MAX_RETRIES = 3
4040

4141

42+
def _has_tool_call(intermediate_events: Optional[list[types.evals.Event]]) -> bool:
43+
"""Checks if any event in intermediate_events has a function call."""
44+
if not intermediate_events:
45+
return False
46+
for event in intermediate_events:
47+
if event.content and event.content.parts:
48+
for part in event.content.parts:
49+
if hasattr(part, "function_call") and part.function_call:
50+
return True
51+
return False
52+
53+
4254
def _extract_text_from_content(
4355
content: Optional[genai_types.Content], warn_property: str = "text"
4456
) -> Optional[str]:
@@ -903,6 +915,14 @@ def _build_request_payload(
903915
f"Response content missing for candidate {response_index}."
904916
)
905917

918+
if self.metric.name == "tool_use_quality_v1":
919+
if not _has_tool_call(eval_case.intermediate_events):
920+
logger.warning(
921+
"Metric 'tool_use_quality_v1' requires tool usage in "
922+
"'intermediate_events', but no tool usage was found for case %s.",
923+
eval_case.eval_case_id,
924+
)
925+
906926
reference_instance_data = None
907927
if eval_case.reference:
908928
reference_instance_data = PredefinedMetricHandler._content_to_instance_data(

0 commit comments

Comments
 (0)