diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
index cd97ab042c..d10523f563 100644
--- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
+++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
@@ -18,6 +18,7 @@
 from vertexai import types
 from google.genai import types as genai_types
 import pytest
+import pandas as pd
 
 GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output"
 GENERAL_QUALITY_METRIC = types.EvaluationRunMetric(
@@ -63,28 +64,46 @@
         )
     ),
 )
-
+TOOL = genai_types.Tool(
+    function_declarations=[
+        genai_types.FunctionDeclaration(
+            name="get_weather",
+            description="Get weather in a location",
+            parameters={
+                "type": "object",
+                "properties": {"location": {"type": "string"}},
+            },
+        )
+    ]
+)
+AGENT_INFO = types.evals.AgentInfo(
+    agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456",
+    name="agent-1",
+    instruction="agent-1 instruction",
+    tool_declarations=[TOOL],
+)
+DEFAULT_PROMPT_TEMPLATE = "{prompt}"
+INPUT_DF_WITH_CONTEXT_AND_HISTORY = pd.DataFrame(
+    {
+        "prompt": ["prompt1", "prompt2"],
+        "reference": ["reference1", "reference2"],
+        "response": ["response1", "response2"],
+        "context": ["context1", "context2"],
+        "conversation_history": ["history1", "history2"],
+    }
+)
+CANDIDATE_NAME = "candidate_1"
+MODEL_NAME = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
+EVAL_SET_NAME = "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
 
 def test_create_eval_run_data_source_evaluation_set(client):
     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
     client._api_client._http_options.api_version = "v1beta1"
-    tool = genai_types.Tool(
-        function_declarations=[
-            genai_types.FunctionDeclaration(
-                name="get_weather",
-                description="Get weather in a location",
-                parameters={
-                    "type": "object",
-                    "properties": {"location": {"type": "string"}},
-                },
-            )
-        ]
-    )
     evaluation_run = client.evals.create_evaluation_run(
         name="test4",
         display_name="test4",
         dataset=types.EvaluationRunDataSource(
-            evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
+            evaluation_set=EVAL_SET_NAME
         ),
         dest=GCS_DEST,
         metrics=[
@@ -94,21 +113,14 @@ def test_create_eval_run_data_source_evaluation_set(client):
             EXACT_MATCH_COMPUTATION_BASED_METRIC,
             BLEU_COMPUTATION_BASED_METRIC,
         ],
-        agent_info=types.evals.AgentInfo(
-            agent_resource_name="project/123/locations/us-central1/reasoningEngines/456",
-            name="agent-1",
-            instruction="agent-1 instruction",
-            tool_declarations=[tool],
-        ),
+        agent_info=AGENT_INFO,
         labels={"label1": "value1"},
     )
     assert isinstance(evaluation_run, types.EvaluationRun)
     assert evaluation_run.display_name == "test4"
     assert evaluation_run.state == types.EvaluationRunState.PENDING
     assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
-    assert evaluation_run.data_source.evaluation_set == (
-        "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
-    )
+    assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME
     assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
         output_config=genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
@@ -122,13 +134,13 @@ def test_create_eval_run_data_source_evaluation_set(client):
         ],
     )
     assert evaluation_run.inference_configs[
-        "agent-1"
+        AGENT_INFO.name
     ] == types.EvaluationRunInferenceConfig(
         agent_config=types.EvaluationRunAgentConfig(
             developer_instruction=genai_types.Content(
                 parts=[genai_types.Part(text="agent-1 instruction")]
             ),
-            tools=[tool],
+            tools=[TOOL],
         )
     )
     assert evaluation_run.labels == {
@@ -190,13 +202,16 @@ def test_create_eval_run_with_inference_configs(client):
     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
     client._api_client._http_options.api_version = "v1beta1"
     inference_config = types.EvaluationRunInferenceConfig(
-        model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
+        model=MODEL_NAME,
+        prompt_template=types.EvaluationRunPromptTemplate(
+            prompt_template="test prompt template"
+        ),
     )
     evaluation_run = client.evals.create_evaluation_run(
         name="test_inference_config",
         display_name="test_inference_config",
         dataset=types.EvaluationRunDataSource(
-            evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
+            evaluation_set=EVAL_SET_NAME
         ),
         dest=GCS_DEST,
         metrics=[GENERAL_QUALITY_METRIC],
@@ -207,9 +222,7 @@ def test_create_eval_run_with_inference_configs(client):
     assert evaluation_run.display_name == "test_inference_config"
     assert evaluation_run.state == types.EvaluationRunState.PENDING
     assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
-    assert evaluation_run.data_source.evaluation_set == (
-        "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
-    )
+    assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME
     assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
         output_config=genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
@@ -223,9 +236,11 @@ def test_create_eval_run_with_inference_configs(client):
     assert evaluation_run.error is None
 
 
-# Test fails in replay mode because of UUID generation mismatch.
+# Dataframe tests fail in replay mode because of UUID generation mismatch.
 # def test_create_eval_run_data_source_evaluation_dataset(client):
-#     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset."""
+#     """Tests that create_evaluation_run() creates a correctly structured
+#     EvaluationRun with EvaluationDataset.
+#     """
 #     input_df = pd.DataFrame(
 #         {
 #             "prompt": ["prompt1", "prompt2"],
@@ -275,7 +290,7 @@ def test_create_eval_run_with_inference_configs(client):
 #         name="test6",
 #         display_name="test6",
 #         dataset=types.EvaluationDataset(
-#             candidate_name="candidate_1",
+#             candidate_name=CANDIDATE_NAME,
 #             eval_dataset_df=input_df,
 #         ),
 #         dest=GCS_DEST,
@@ -319,6 +334,196 @@ def test_create_eval_run_with_inference_configs(client):
 #     assert evaluation_run.error is None
 
 
+# def test_create_eval_run_data_source_evaluation_dataset_with_inference_configs_and_prompt_template_data(
+#     client,
+# ):
+#     """Tests that create_evaluation_run() creates a correctly structured
+#     EvaluationRun with EvaluationDataset and inference_configs.
+#     Prompt template data is inferred from the dataset and a default prompt
+#     template should be used.
+#     """
+#     evaluation_run = client.evals.create_evaluation_run(
+#         name="test9",
+#         display_name="test9",
+#         dataset=types.EvaluationDataset(
+#             candidate_name=CANDIDATE_NAME,
+#             eval_dataset_df=INPUT_DF_WITH_CONTEXT_AND_HISTORY,
+#         ),
+#         dest=GCS_DEST,
+#         metrics=[GENERAL_QUALITY_METRIC],
+#         inference_configs={
+#             CANDIDATE_NAME: types.EvaluationRunInferenceConfig(
+#                 model=MODEL_NAME,
+#             )
+#         },
+#     )
+#     assert isinstance(evaluation_run, types.EvaluationRun)
+#     assert evaluation_run.display_name == "test9"
+#     assert evaluation_run.state == types.EvaluationRunState.PENDING
+#     assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
+#     assert evaluation_run.inference_configs[
+#         CANDIDATE_NAME
+#     ] == types.EvaluationRunInferenceConfig(
+#         model=MODEL_NAME,
+#         prompt_template=types.EvaluationRunPromptTemplate(
+#             prompt_template=DEFAULT_PROMPT_TEMPLATE
+#         ),
+#     )
+#     # Check evaluation set
+#     assert evaluation_run.data_source.evaluation_set
+#     eval_set = client.evals.get_evaluation_set(
+#         name=evaluation_run.data_source.evaluation_set
+#     )
+#     assert len(eval_set.evaluation_items) == 2
+#     # Check evaluation items
+#     for i, eval_item_name in enumerate(eval_set.evaluation_items):
+#         eval_item = client.evals.get_evaluation_item(name=eval_item_name)
+#         assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "prompt"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["prompt"]
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "context"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["context"]
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "conversation_history"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=(
+#                             INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i][
+#                                 "conversation_history"
+#                             ]
+#                         )
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.candidate_responses[0].text
+#             == INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["response"]
+#         )
+#     assert evaluation_run.error is None
+
+
+# def test_create_eval_run_data_source_evaluation_dataset_with_agent_info_and_prompt_template_data(
+#     client,
+# ):
+#     """Tests that create_evaluation_run() creates a correctly structured
+#     EvaluationRun with EvaluationDataset and agent_info.
+#     Prompt template data is inferred from the dataset and a default prompt
+#     template should be used.
+#     """
+#     evaluation_run = client.evals.create_evaluation_run(
+#         name="test9",
+#         display_name="test9",
+#         dataset=types.EvaluationDataset(
+#             candidate_name=CANDIDATE_NAME,
+#             eval_dataset_df=INPUT_DF_WITH_CONTEXT_AND_HISTORY,
+#         ),
+#         dest=GCS_DEST,
+#         metrics=[GENERAL_QUALITY_METRIC],
+#         agent_info=AGENT_INFO,
+#     )
+#     assert isinstance(evaluation_run, types.EvaluationRun)
+#     assert evaluation_run.display_name == "test9"
+#     assert evaluation_run.state == types.EvaluationRunState.PENDING
+#     assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
+#     assert evaluation_run.inference_configs[
+#         AGENT_INFO.name
+#     ] == types.EvaluationRunInferenceConfig(
+#         agent_config=types.EvaluationRunAgentConfig(
+#             developer_instruction=genai_types.Content(
+#                 parts=[genai_types.Part(text=AGENT_INFO.instruction)]
+#             ),
+#             tools=[TOOL],
+#         ),
+#         prompt_template=types.EvaluationRunPromptTemplate(
+#             prompt_template=DEFAULT_PROMPT_TEMPLATE
+#         ),
+#     )
+#     # Check evaluation set
+#     assert evaluation_run.data_source.evaluation_set
+#     eval_set = client.evals.get_evaluation_set(
+#         name=evaluation_run.data_source.evaluation_set
+#     )
+#     assert len(eval_set.evaluation_items) == 2
+#     # Check evaluation items
+#     for i, eval_item_name in enumerate(eval_set.evaluation_items):
+#         eval_item = client.evals.get_evaluation_item(name=eval_item_name)
+#         assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "prompt"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["prompt"]
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "context"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["context"]
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.prompt.prompt_template_data.values[
+#                 "conversation_history"
+#             ]
+#             == genai_types.Content(
+#                 parts=[
+#                     genai_types.Part(
+#                         text=(
+#                             INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i][
+#                                 "conversation_history"
+#                             ]
+#                         )
+#                     )
+#                 ],
+#                 role="user",
+#             )
+#         )
+#         assert (
+#             eval_item.evaluation_request.candidate_responses[0].text
+#             == INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["response"]
+#         )
+#     assert evaluation_run.error is None
+
 pytest_plugins = ("pytest_asyncio",)
 
 
@@ -371,13 +576,16 @@ async def test_create_eval_run_async_with_inference_configs(client):
     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously."""
     client._api_client._http_options.api_version = "v1beta1"
     inference_config = types.EvaluationRunInferenceConfig(
-        model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
+        model=MODEL_NAME,
+        prompt_template=types.EvaluationRunPromptTemplate(
+            prompt_template="Test the {prompt}"
+        ),
     )
     evaluation_run = await client.aio.evals.create_evaluation_run(
         name="test_inference_config_async",
         display_name="test_inference_config_async",
         dataset=types.EvaluationRunDataSource(
-            evaluation_set="projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
+            evaluation_set=EVAL_SET_NAME
         ),
         dest=GCS_DEST,
         metrics=[GENERAL_QUALITY_METRIC],
@@ -388,9 +596,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
     assert evaluation_run.display_name == "test_inference_config_async"
     assert evaluation_run.state == types.EvaluationRunState.PENDING
     assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
-    assert evaluation_run.data_source.evaluation_set == (
-        "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
-    )
+    assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME
     assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
         output_config=genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
index f33320324a..277dacaa87 100644
--- a/vertexai/_genai/_evals_common.py
+++ b/vertexai/_genai/_evals_common.py
@@ -258,6 +258,145 @@ def _extract_contents_for_inference(
         return request_dict_or_raw_text
 
 
+def _resolve_dataset(
+    api_client: BaseApiClient,
+    dataset: Union[types.EvaluationRunDataSource, types.EvaluationDataset],
+    dest: str,
+    agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
+) -> types.EvaluationRunDataSource:
+    """Resolves dataset for the evaluation run."""
+    if isinstance(dataset, types.EvaluationDataset):
+        candidate_name = _get_candidate_name(dataset, agent_info_pydantic)
+        eval_set = _create_evaluation_set_from_dataframe(
+            api_client,
+            dest,
+            dataset.eval_dataset_df,
+            candidate_name,
+        )
+        dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
+    return dataset
+
+
+def _get_default_prompt_template(
+    api_client: BaseApiClient,
+    inference_config: types.EvaluationRunInferenceConfigOrDict,
+    dataset: types.EvaluationRunDataSource,
+) -> Any:
+    """Resolves prompt template data for the evaluation run."""
+    if isinstance(inference_config, dict):
+        if inference_config.get("prompt_template"):
+            return inference_config["prompt_template"]
+    elif inference_config.prompt_template:
+        return inference_config.prompt_template
+
+    try:
+        evals_module = evals.Evals(api_client_=api_client)
+        eval_set = evals_module.get_evaluation_set(name=dataset.evaluation_set)
+        if eval_set and eval_set.evaluation_items:
+            eval_item = evals_module.get_evaluation_item(
+                name=eval_set.evaluation_items[0]
+            )
+            if (
+                eval_item
+                and eval_item.evaluation_request
+                and eval_item.evaluation_request.prompt
+                and eval_item.evaluation_request.prompt.prompt_template_data
+            ):
+                if (
+                    "prompt"
+                    in eval_item.evaluation_request.prompt.prompt_template_data.values
+                ):
+                    return "{prompt}"  # Default prompt template
+    except Exception as e:
+        logger.warning("Failed to get prompt template from evaluation set: %s", e)
+    return None
+
+
+def _resolve_inference_configs(
+    api_client: BaseApiClient,
+    dataset: types.EvaluationRunDataSource,
+    inference_configs: Optional[
+        dict[str, types.EvaluationRunInferenceConfigOrDict]
+    ] = None,
+    agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
+) -> Optional[dict[str, types.EvaluationRunInferenceConfigOrDict]]:
+    """Resolves inference configs for the evaluation run."""
+    # Resolve agent config
+    if agent_info_pydantic and agent_info_pydantic.name:
+        inference_configs = {}
+        inference_configs[agent_info_pydantic.name] = (
+            types.EvaluationRunInferenceConfig(
+                agent_config=types.EvaluationRunAgentConfig(
+                    developer_instruction=genai_types.Content(
+                        parts=[genai_types.Part(text=agent_info_pydantic.instruction)]
+                    ),
+                    tools=agent_info_pydantic.tool_declarations,
+                )
+            )
+        )
+    # Resolve prompt template data
+    if inference_configs:
+        for inference_config in inference_configs.values():
+            prompt_template_val = (
+                inference_config.get("prompt_template")
+                if isinstance(inference_config, dict)
+                else inference_config.prompt_template
+            )
+            if not prompt_template_val:
+                default_prompt_template = _get_default_prompt_template(
+                    api_client, inference_config, dataset
+                )
+                if default_prompt_template:
+                    prompt_template_to_set = default_prompt_template
+                    if not isinstance(
+                        default_prompt_template, types.EvaluationRunPromptTemplate
+                    ):
+                        prompt_template_to_set = types.EvaluationRunPromptTemplate(
+                            prompt_template=default_prompt_template
+                        )
+                    if isinstance(inference_config, dict):
+                        inference_config[
+                            "prompt_template"
+                        ] = prompt_template_to_set.model_dump(exclude_none=True)
+                    else:
+                        inference_config.prompt_template = (
+                            prompt_template_to_set.model_dump(exclude_none=True)
+                        )
+    return inference_configs
+
+
+def _add_evaluation_run_labels(
+    labels: Optional[dict[str, str]] = None,
+    agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
+) -> Optional[dict[str, str]]:
+    """Adds labels to the evaluation run."""
+    if agent_info_pydantic and agent_info_pydantic.agent_resource_name:
+        labels = labels or {}
+        labels["vertex-ai-evaluation-agent-engine-id"] = (
+            agent_info_pydantic.agent_resource_name.split("reasoningEngines/")[-1]
+        )
+    return labels
+
+
+def _get_candidate_name(
+    dataset: types.EvaluationDataset,
+    agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
+) -> Optional[str]:
+    """Internal helper to get candidate name."""
+    if agent_info_pydantic is not None and (
+        dataset.candidate_name
+        and agent_info_pydantic
+        and agent_info_pydantic.name
+        and dataset.candidate_name != agent_info_pydantic.name
+    ):
+        logger.warning(
+            "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
+        )
+    elif dataset.candidate_name is None and agent_info_pydantic:
+        return agent_info_pydantic.name
+    return dataset.candidate_name or None
+
+
 def _execute_inference_concurrently(
     api_client: BaseApiClient,
     prompt_dataset: pd.DataFrame,
@@ -1858,6 +1997,9 @@ def _object_to_dict(obj: Any) -> Union[dict[str, Any], Any]:
             result[key] = value
         elif isinstance(value, (list, tuple)):
             result[key] = [_object_to_dict(item) for item in value]
+        # Add recursive handling for dictionaries
+        elif isinstance(value, dict):
+            result[key] = {k: _object_to_dict(v) for k, v in value.items()}
         elif isinstance(value, bytes):
             result[key] = base64.b64encode(value).decode("utf-8")
         elif hasattr(value, "__dict__"):  # Nested object
@@ -1885,13 +2027,31 @@ def _create_evaluation_set_from_dataframe(
             for event in row[_evals_constant.INTERMEDIATE_EVENTS]:
                 if CONTENT in event:
                     intermediate_events.append(event[CONTENT])
+        if _evals_constant.CONTEXT in row or _evals_constant.HISTORY in row:
+            values = {}
+            if _evals_constant.CONTEXT in row:
+                values[_evals_constant.CONTEXT] = genai_types.Content(
+                    parts=[genai_types.Part(text=row[_evals_constant.CONTEXT])],
+                    role=_evals_constant.USER_AUTHOR,
+                )
+            if _evals_constant.HISTORY in row:
+                values[_evals_constant.HISTORY] = genai_types.Content(
+                    parts=[genai_types.Part(text=row[_evals_constant.HISTORY])],
+                    role=_evals_constant.USER_AUTHOR,
+                )
+            if _evals_constant.PROMPT in row:
+                values[_evals_constant.PROMPT] = genai_types.Content(
+                    parts=[genai_types.Part(text=row[_evals_constant.PROMPT])],
+                    role=_evals_constant.USER_AUTHOR,
+                )
+            prompt = types.EvaluationPrompt(
+                prompt_template_data=types.PromptTemplateData(values=values)
+            )
+        elif _evals_constant.PROMPT in row:
+            prompt = types.EvaluationPrompt(text=row[_evals_constant.PROMPT])
         eval_item_requests.append(
             types.EvaluationItemRequest(
-                prompt=(
-                    types.EvaluationPrompt(text=row[_evals_constant.PROMPT])
-                    if _evals_constant.PROMPT in row
-                    else None
-                ),
+                prompt=prompt if prompt else None,
                 golden_response=(
                     types.CandidateResponse(text=row[_evals_constant.REFERENCE])
                     if _evals_constant.REFERENCE in row
diff --git a/vertexai/_genai/_evals_constant.py b/vertexai/_genai/_evals_constant.py
index 6fc27d94e0..847140dc5c 100644
--- a/vertexai/_genai/_evals_constant.py
+++ b/vertexai/_genai/_evals_constant.py
@@ -53,6 +53,7 @@
 CONTENT = "content"
 PARTS = "parts"
 USER_AUTHOR = "user"
+HISTORY = "conversation_history"
 
 COMMON_DATASET_COLUMNS = frozenset(
     {
@@ -61,5 +62,6 @@
         REFERENCE,
         SESSION_INPUT,
         CONTEXT,
+        HISTORY,
     }
 )
diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py
index 3632628b87..73922955cd 100644
--- a/vertexai/_genai/evals.py
+++ b/vertexai/_genai/evals.py
@@ -253,6 +253,9 @@ def _EvaluationRunConfig_from_vertex(
     if getv(from_object, ["autoraterConfig"]) is not None:
         setv(to_object, ["autorater_config"], getv(from_object, ["autoraterConfig"]))
 
+    if getv(from_object, ["promptTemplate"]) is not None:
+        setv(to_object, ["prompt_template"], getv(from_object, ["promptTemplate"]))
+
     return to_object
 
 
@@ -277,6 +280,9 @@ def _EvaluationRunConfig_to_vertex(
     if getv(from_object, ["autorater_config"]) is not None:
         setv(to_object, ["autoraterConfig"], getv(from_object, ["autorater_config"]))
 
+    if getv(from_object, ["prompt_template"]) is not None:
+        setv(to_object, ["promptTemplate"], getv(from_object, ["prompt_template"]))
+
     return to_object
 
 
@@ -1625,32 +1631,14 @@ def create_evaluation_run(
             raise ValueError(
                 "At most one of agent_info or inference_configs can be provided."
             )
-        agent_info_pydantic: types.evals.AgentInfo = types.evals.AgentInfo()
-        if agent_info:
-            if isinstance(agent_info, dict):
-                agent_info_pydantic = types.evals.AgentInfo.model_validate(agent_info)
-            else:
-                agent_info_pydantic = agent_info
-        if isinstance(dataset, types.EvaluationDataset):
-            if dataset.eval_dataset_df is None:
-                raise ValueError(
-                    "EvaluationDataset must have eval_dataset_df populated."
-                )
-            if agent_info_pydantic is not None and (
-                dataset.candidate_name
-                and agent_info_pydantic
-                and agent_info_pydantic.name
-                and dataset.candidate_name != agent_info_pydantic.name
-            ):
-                logger.warning(
-                    "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
-                )
-            elif dataset.candidate_name is None and agent_info_pydantic:
-                dataset.candidate_name = agent_info_pydantic.name
-            eval_set = _evals_common._create_evaluation_set_from_dataframe(
-                self._api_client, dest, dataset.eval_dataset_df, dataset.candidate_name
-            )
-            dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
+        agent_info_pydantic = (
+            types.evals.AgentInfo.model_validate(agent_info)
+            if isinstance(agent_info, dict)
+            else (agent_info or types.evals.AgentInfo())
+        )
+        resolved_dataset = _evals_common._resolve_dataset(
+            self._api_client, dataset, dest, agent_info_pydantic
+        )
         output_config = genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
         )
@@ -1660,37 +1648,20 @@ def create_evaluation_run(
         evaluation_config = types.EvaluationRunConfig(
             output_config=output_config, metrics=resolved_metrics
         )
-        if agent_info_pydantic and agent_info_pydantic.name is not None:
-            inference_configs = {}
-            inference_configs[agent_info_pydantic.name] = (
-                types.EvaluationRunInferenceConfig(
-                    agent_config=types.EvaluationRunAgentConfig(
-                        developer_instruction=genai_types.Content(
-                            parts=[
-                                genai_types.Part(text=agent_info_pydantic.instruction)
-                            ]
-                        ),
-                        tools=agent_info_pydantic.tool_declarations,
-                    )
-                )
-            )
-            if agent_info_pydantic.agent_resource_name:
-                labels = labels or {}
-                labels["vertex-ai-evaluation-agent-engine-id"] = (
-                    agent_info_pydantic.agent_resource_name.split("reasoningEngines/")[
-                        -1
-                    ]
-                )
-        if not name:
-            name = f"evaluation_run_{uuid.uuid4()}"
-
+        resolved_inference_configs = _evals_common._resolve_inference_configs(
+            self._api_client, resolved_dataset, inference_configs, agent_info_pydantic
+        )
+        resolved_labels = _evals_common._add_evaluation_run_labels(
+            labels, agent_info_pydantic
+        )
+        resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
         return self._create_evaluation_run(
-            name=name,
-            display_name=display_name or name,
-            data_source=dataset,
+            name=resolved_name,
+            display_name=display_name or resolved_name,
+            data_source=resolved_dataset,
             evaluation_config=evaluation_config,
-            inference_configs=inference_configs,
-            labels=labels,
+            inference_configs=resolved_inference_configs,
+            labels=resolved_labels,
             config=config,
         )
 
@@ -2495,27 +2466,14 @@ async def create_evaluation_run(
             raise ValueError(
                 "At most one of agent_info or inference_configs can be provided."
             )
-        if agent_info and isinstance(agent_info, dict):
-            agent_info = types.evals.AgentInfo.model_validate(agent_info)
-        if isinstance(dataset, types.EvaluationDataset):
-            if dataset.eval_dataset_df is None:
-                raise ValueError(
-                    "EvaluationDataset must have eval_dataset_df populated."
-                )
-            if agent_info is not None and (
-                dataset.candidate_name
-                and agent_info.name
-                and dataset.candidate_name != agent_info.name
-            ):
-                logger.warning(
-                    "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
-                )
-            elif dataset.candidate_name is None and agent_info:
-                dataset.candidate_name = agent_info.name
-            eval_set = _evals_common._create_evaluation_set_from_dataframe(
-                self._api_client, dest, dataset.eval_dataset_df, dataset.candidate_name
-            )
-            dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
+        agent_info_pydantic = (
+            types.evals.AgentInfo.model_validate(agent_info)
+            if isinstance(agent_info, dict)
+            else (agent_info or types.evals.AgentInfo())
+        )
+        resolved_dataset = _evals_common._resolve_dataset(
+            self._api_client, dataset, dest, agent_info_pydantic
+        )
         output_config = genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
         )
@@ -2525,31 +2483,21 @@ async def create_evaluation_run(
         evaluation_config = types.EvaluationRunConfig(
             output_config=output_config, metrics=resolved_metrics
         )
-        if agent_info and agent_info.name is not None:
-            inference_configs = {}
-            inference_configs[agent_info.name] = types.EvaluationRunInferenceConfig(
-                agent_config=types.EvaluationRunAgentConfig(
-                    developer_instruction=genai_types.Content(
-                        parts=[genai_types.Part(text=agent_info.instruction)]
-                    ),
-                    tools=agent_info.tool_declarations,
-                )
-            )
-            if agent_info.agent_resource_name:
-                labels = labels or {}
-                labels["vertex-ai-evaluation-agent-engine-id"] = (
-                    agent_info.agent_resource_name.split("reasoningEngines/")[-1]
-                )
-        if not name:
-            name = f"evaluation_run_{uuid.uuid4()}"
+        resolved_inference_configs = _evals_common._resolve_inference_configs(
+            self._api_client, resolved_dataset, inference_configs, agent_info_pydantic
+        )
+        resolved_labels = _evals_common._add_evaluation_run_labels(
+            labels, agent_info_pydantic
+        )
+        resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
 
         result = await self._create_evaluation_run(
-            name=name,
-            display_name=display_name or name,
-            data_source=dataset,
+            name=resolved_name,
+            display_name=display_name or resolved_name,
+            data_source=resolved_dataset,
             evaluation_config=evaluation_config,
-            inference_configs=inference_configs,
-            labels=labels,
+            inference_configs=resolved_inference_configs,
+            labels=resolved_labels,
             config=config,
         )
 
diff --git a/vertexai/_genai/types/__init__.py b/vertexai/_genai/types/__init__.py
index 8b02bc222c..9a7248ad23 100644
--- a/vertexai/_genai/types/__init__.py
+++ b/vertexai/_genai/types/__init__.py
@@ -362,6 +362,9 @@
 from .common import EvaluationRunMetricDict
 from .common import EvaluationRunMetricOrDict
 from .common import EvaluationRunOrDict
+from .common import EvaluationRunPromptTemplate
+from .common import EvaluationRunPromptTemplateDict
+from .common import EvaluationRunPromptTemplateOrDict
 from .common import EvaluationRunResults
 from .common import EvaluationRunResultsDict
 from .common import EvaluationRunResultsOrDict
@@ -1104,6 +1107,9 @@
     "EvaluationRunMetric",
     "EvaluationRunMetricDict",
     "EvaluationRunMetricOrDict",
+    "EvaluationRunPromptTemplate",
+    "EvaluationRunPromptTemplateDict",
+    "EvaluationRunPromptTemplateOrDict",
     "EvaluationRunConfig",
     "EvaluationRunConfigDict",
     "EvaluationRunConfigOrDict",
diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py
index 2ec662eded..561929c10c 100644
--- a/vertexai/_genai/types/common.py
+++ b/vertexai/_genai/types/common.py
@@ -1098,6 +1098,38 @@ class EvaluationRunMetricDict(TypedDict, total=False):
 EvaluationRunMetricOrDict = Union[EvaluationRunMetric, EvaluationRunMetricDict]
 
 
+class EvaluationRunPromptTemplate(_common.BaseModel):
+    """Prompt template used for inference."""
+
+    prompt_template: Optional[str] = Field(
+        default=None,
+        description="""Inline prompt template. Template variables should be in the format
+      "{var_name}".""",
+    )
+    gcs_uri: Optional[str] = Field(
+        default=None,
+        description="""Prompt template stored in Cloud Storage. Format:
+      "gs://my-bucket/file-name.txt".""",
+    )
+
+
+class EvaluationRunPromptTemplateDict(TypedDict, total=False):
+    """Prompt template used for inference."""
+
+    prompt_template: Optional[str]
+    """Inline prompt template. Template variables should be in the format
+      "{var_name}"."""
+
+    gcs_uri: Optional[str]
+    """Prompt template stored in Cloud Storage. Format:
+      "gs://my-bucket/file-name.txt"."""
+
+
+EvaluationRunPromptTemplateOrDict = Union[
+    EvaluationRunPromptTemplate, EvaluationRunPromptTemplateDict
+]
+
+
 class EvaluationRunConfig(_common.BaseModel):
     """The evaluation configuration used for the evaluation run."""
 
@@ -1111,6 +1143,9 @@ class EvaluationRunConfig(_common.BaseModel):
     autorater_config: Optional[genai_types.AutoraterConfig] = Field(
         default=None, description="""The autorater config for the evaluation run."""
     )
+    prompt_template: Optional[EvaluationRunPromptTemplate] = Field(
+        default=None, description="""The prompt template used for inference."""
+    )
 
 
 class EvaluationRunConfigDict(TypedDict, total=False):
@@ -1125,6 +1160,9 @@ class EvaluationRunConfigDict(TypedDict, total=False):
     autorater_config: Optional[genai_types.AutoraterConfigDict]
     """The autorater config for the evaluation run."""
 
+    prompt_template: Optional[EvaluationRunPromptTemplateDict]
+    """The prompt template used for inference."""
+
 
 EvaluationRunConfigOrDict = Union[EvaluationRunConfig, EvaluationRunConfigDict]
 
@@ -1843,6 +1881,9 @@ class EvaluationRunInferenceConfig(_common.BaseModel):
         default=None,
         description="""The fully qualified name of the publisher model or endpoint to use for inference.""",
     )
+    prompt_template: Optional[EvaluationRunPromptTemplate] = Field(
+        default=None, description="""The prompt template used for inference."""
+    )
 
 
 class EvaluationRunInferenceConfigDict(TypedDict, total=False):
@@ -1857,6 +1898,9 @@ class EvaluationRunInferenceConfigDict(TypedDict, total=False):
     model: Optional[str]
     """The fully qualified name of the publisher model or endpoint to use for inference."""
 
+    prompt_template: Optional[EvaluationRunPromptTemplateDict]
+    """The prompt template used for inference."""
+
 
 EvaluationRunInferenceConfigOrDict = Union[
     EvaluationRunInferenceConfig, EvaluationRunInferenceConfigDict