Skip to content

Commit a39e386

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add PromptTemplateData to support context and history columns when creating Evaluation run from dataframe
PiperOrigin-RevId: 871483777
1 parent 4a8b9a1 commit a39e386

File tree

3 files changed

+100
-6
lines changed

3 files changed

+100
-6
lines changed

tests/unit/vertexai/genai/replays/test_create_evaluation_run.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,9 @@ def test_create_eval_run_with_inference_configs(client):
223223
assert evaluation_run.error is None
224224

225225

226-
# Test fails in replay mode because of UUID generation mismatch.
226+
# # Test fails in replay mode because of UUID generation mismatch.
227+
# import pandas as pd
228+
227229
# def test_create_eval_run_data_source_evaluation_dataset(client):
228230
# """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset."""
229231
# input_df = pd.DataFrame(
@@ -319,6 +321,75 @@ def test_create_eval_run_with_inference_configs(client):
319321
# assert evaluation_run.error is None
320322

321323

324+
# def test_create_eval_run_data_source_evaluation_dataset_with_prompt_template_data(
325+
# client,
326+
# ):
327+
# """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset and inference_configs."""
328+
# input_df = pd.DataFrame(
329+
# {
330+
# "prompt": ["prompt1", "prompt2"],
331+
# "reference": ["reference1", "reference2"],
332+
# "response": ["response1", "response2"],
333+
# "context": ["context1", "context2"],
334+
# "conversation_history": ["history1", "history2"],
335+
# }
336+
# )
337+
# evaluation_run = client.evals.create_evaluation_run(
338+
# name="test9",
339+
# display_name="test9",
340+
# dataset=types.EvaluationDataset(
341+
# candidate_name="candidate_1",
342+
# eval_dataset_df=input_df,
343+
# ),
344+
# dest=GCS_DEST,
345+
# metrics=[GENERAL_QUALITY_METRIC],
346+
# )
347+
# assert isinstance(evaluation_run, types.EvaluationRun)
348+
# assert evaluation_run.display_name == "test9"
349+
# assert evaluation_run.state == types.EvaluationRunState.PENDING
350+
# assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
351+
# # Check evaluation set
352+
# assert evaluation_run.data_source.evaluation_set
353+
# eval_set = client.evals.get_evaluation_set(
354+
# name=evaluation_run.data_source.evaluation_set
355+
# )
356+
# assert len(eval_set.evaluation_items) == 2
357+
# # Check evaluation items
358+
# for i, eval_item_name in enumerate(eval_set.evaluation_items):
359+
# eval_item = client.evals.get_evaluation_item(name=eval_item_name)
360+
# assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
361+
# assert (
362+
# eval_item.evaluation_request.prompt.prompt_template_data.values[
363+
# "prompt"
364+
# ]
365+
# == genai_types.Content(
366+
# parts=[genai_types.Part(text=input_df.iloc[i]["prompt"])],
367+
# role="user",
368+
# )
369+
# )
370+
# assert (
371+
# eval_item.evaluation_request.prompt.prompt_template_data.values[
372+
# "context"
373+
# ]
374+
# == genai_types.Content(
375+
# parts=[genai_types.Part(text=input_df.iloc[i]["context"])],
376+
# role="user",
377+
# )
378+
# )
379+
# assert (
380+
# eval_item.evaluation_request.prompt.prompt_template_data.values[
381+
# "conversation_history"
382+
# ]
383+
# == genai_types.Content(
384+
# parts=[genai_types.Part(text=input_df.iloc[i]["conversation_history"])],
385+
# role="user",
386+
# )
387+
# )
388+
# assert (
389+
# eval_item.evaluation_request.candidate_responses[0].text
390+
# == input_df.iloc[i]["response"]
391+
# )
392+
# assert evaluation_run.error is None
322393
pytest_plugins = ("pytest_asyncio",)
323394

324395

vertexai/_genai/_evals_common.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1858,6 +1858,9 @@ def _object_to_dict(obj: Any) -> Union[dict[str, Any], Any]:
18581858
result[key] = value
18591859
elif isinstance(value, (list, tuple)):
18601860
result[key] = [_object_to_dict(item) for item in value]
1861+
# Add recursive handling for dictionaries
1862+
elif isinstance(value, dict):
1863+
result[key] = {k: _object_to_dict(v) for k, v in value.items()}
18611864
elif isinstance(value, bytes):
18621865
result[key] = base64.b64encode(value).decode("utf-8")
18631866
elif hasattr(value, "__dict__"): # Nested object
@@ -1885,13 +1888,31 @@ def _create_evaluation_set_from_dataframe(
18851888
for event in row[_evals_constant.INTERMEDIATE_EVENTS]:
18861889
if CONTENT in event:
18871890
intermediate_events.append(event[CONTENT])
1891+
if _evals_constant.CONTEXT in row or _evals_constant.HISTORY in row:
1892+
values = {}
1893+
if _evals_constant.CONTEXT in row:
1894+
values[_evals_constant.CONTEXT] = genai_types.Content(
1895+
parts=[genai_types.Part(text=row[_evals_constant.CONTEXT])],
1896+
role=_evals_constant.USER_AUTHOR,
1897+
)
1898+
if _evals_constant.HISTORY in row:
1899+
values[_evals_constant.HISTORY] = genai_types.Content(
1900+
parts=[genai_types.Part(text=row[_evals_constant.HISTORY])],
1901+
role=_evals_constant.USER_AUTHOR,
1902+
)
1903+
if _evals_constant.PROMPT in row:
1904+
values[_evals_constant.PROMPT] = genai_types.Content(
1905+
parts=[genai_types.Part(text=row[_evals_constant.PROMPT])],
1906+
role=_evals_constant.USER_AUTHOR,
1907+
)
1908+
prompt = types.EvaluationPrompt(
1909+
prompt_template_data=types.PromptTemplateData(values=values)
1910+
)
1911+
elif _evals_constant.PROMPT in row:
1912+
prompt = types.EvaluationPrompt(text=row[_evals_constant.PROMPT])
18881913
eval_item_requests.append(
18891914
types.EvaluationItemRequest(
1890-
prompt=(
1891-
types.EvaluationPrompt(text=row[_evals_constant.PROMPT])
1892-
if _evals_constant.PROMPT in row
1893-
else None
1894-
),
1915+
prompt=prompt if prompt else None,
18951916
golden_response=(
18961917
types.CandidateResponse(text=row[_evals_constant.REFERENCE])
18971918
if _evals_constant.REFERENCE in row

vertexai/_genai/_evals_constant.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
CONTENT = "content"
5454
PARTS = "parts"
5555
USER_AUTHOR = "user"
56+
HISTORY = "conversation_history"
5657

5758
COMMON_DATASET_COLUMNS = frozenset(
5859
{
@@ -61,5 +62,6 @@
6162
REFERENCE,
6263
SESSION_INPUT,
6364
CONTEXT,
65+
HISTORY,
6466
}
6567
)

0 commit comments

Comments
 (0)