googleapis
diff --git a/‎tests/unit/vertexai/genai/replays/conftest.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/unit/vertexai/genai/replays/conftest.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/unit/vertexai/genai/replays/test_evaluate_instances.py‎
Lines changed: 15 additions & 15 deletions b/‎tests/unit/vertexai/genai/replays/test_evaluate_instances.py‎
Lines changed: 15 additions & 15 deletions
diff --git a/‎tests/unit/vertexai/genai/test_evals.py‎
Lines changed: 30 additions & 29 deletions b/‎tests/unit/vertexai/genai/test_evals.py‎
Lines changed: 30 additions & 29 deletions
diff --git a/‎vertexai/_genai/_bigquery_utils.py‎
Lines changed: 49 additions & 0 deletions b/‎vertexai/_genai/_bigquery_utils.py‎
Lines changed: 49 additions & 0 deletions
@@ -25,7 +25,7 @@
 from google.cloud import storage, bigquery
 from google.genai import _replay_api_client
 from google.genai import client as google_genai_client_module
-from vertexai._genai import _evals_utils
+from vertexai._genai import _gcs_utils
 from vertexai._genai import prompt_optimizer
 import pytest
 
@@ -246,7 +246,7 @@ def client(use_vertex, replays_prefix, http_options, request):
                     mock_bigquery_client.return_value = mock.MagicMock()
 
                     with mock.patch.object(
-                        _evals_utils.GcsUtils, "read_file_contents"
+                        _gcs_utils.GcsUtils, "read_file_contents"
                     ) as mock_read_file_contents:
                         mock_read_file_contents.side_effect = (
                             _mock_read_file_contents_side_effect
 
@@ -103,9 +103,9 @@ def test_pointwise_metric_with_agent_data(client):
     """Tests the _evaluate_instances method with PointwiseMetricInput and agent_data."""
     instance_dict = {"prompt": "What is the capital of France?", "response": "Paris"}
     json_instance = json.dumps(instance_dict)
-    agent_data = types.AgentData(
-        agent_config=types.AgentConfig(
-            tools=types.Tools(
+    agent_data = types.evals.AgentData(
+        agent_config=types.evals.AgentConfig(
+            tools=types.evals.Tools(
                 tool=[
                     genai_types.Tool(
                         function_declarations=[
@@ -114,15 +114,15 @@ def test_pointwise_metric_with_agent_data(client):
                     )
                 ]
             ),
-            developer_instruction=types.InstanceData(text="instruction"),
+            developer_instruction=types.evals.InstanceData(text="instruction"),
         ),
-        events=types.Events(
+        events=types.evals.Events(
             event=[genai_types.Content(parts=[genai_types.Part(text="hello")])]
         ),
     )
     instance = types.EvaluationInstance(
-        prompt=types.InstanceData(text="What is the capital of France?"),
-        response=types.InstanceData(text="Paris"),
+        prompt=types.evals.InstanceData(text="What is the capital of France?"),
+        response=types.evals.InstanceData(text="Paris"),
         agent_data=agent_data,
     )
 
@@ -144,9 +144,9 @@ def test_pointwise_metric_with_agent_data(client):
 
 def test_predefined_metric_with_agent_data(client):
     """Tests the _evaluate_instances method with predefined metric and agent_data."""
-    agent_data = types.AgentData(
-        agent_config=types.AgentConfig(
-            tools=types.Tools(
+    agent_data = types.evals.AgentData(
+        agent_config=types.evals.AgentConfig(
+            tools=types.evals.Tools(
                 tool=[
                     genai_types.Tool(
                         function_declarations=[
@@ -155,16 +155,16 @@ def test_predefined_metric_with_agent_data(client):
                     )
                 ]
             ),
-            developer_instruction=types.InstanceData(text="instruction"),
+            developer_instruction=types.evals.InstanceData(text="instruction"),
         ),
-        events=types.Events(
+        events=types.evals.Events(
             event=[genai_types.Content(parts=[genai_types.Part(text="hello")])]
         ),
     )
     instance = types.EvaluationInstance(
-        prompt=types.InstanceData(text="What is the capital of France?"),
-        response=types.InstanceData(text="Paris"),
-        reference=types.InstanceData(text="Paris"),
+        prompt=types.evals.InstanceData(text="What is the capital of France?"),
+        response=types.evals.InstanceData(text="Paris"),
+        reference=types.evals.InstanceData(text="Paris"),
         agent_data=agent_data,
     )
 
 
@@ -27,6 +27,8 @@
 from vertexai._genai import _evals_data_converters
 from vertexai._genai import _evals_metric_handlers
 from vertexai._genai import _evals_visualization
+from vertexai._genai import _evals_metric_loaders
+from vertexai._genai import _gcs_utils
 from vertexai._genai import _observability_data_converter
 from vertexai._genai import evals
 from vertexai._genai import types as vertexai_genai_types
@@ -76,9 +78,9 @@ def mock_eval_dependencies(mock_api_client_fixture):
     ) as mock_bq_client, mock.patch(
         "vertexai._genai.evals.Evals.evaluate_instances"
     ) as mock_evaluate_instances, mock.patch(
-        "vertexai._genai._evals_utils.GcsUtils.upload_json_to_prefix"
+        "vertexai._genai._gcs_utils.GcsUtils.upload_json_to_prefix"
     ) as mock_upload_to_gcs, mock.patch(
-        "vertexai._genai._evals_utils.LazyLoadedPrebuiltMetric._fetch_and_parse"
+        "vertexai._genai._evals_metric_loaders.LazyLoadedPrebuiltMetric._fetch_and_parse"
     ) as mock_fetch_prebuilt_metric:
 
         def mock_evaluate_instances_side_effect(*args, **kwargs):
@@ -235,7 +237,7 @@ def test_display_evaluation_result_with_agent_trace_prefixes(self, mock_is_ipyth
         )
         eval_result = vertexai_genai_types.EvaluationResult(
             evaluation_dataset=[eval_dataset],
-            agent_info=vertexai_genai_types.AgentInfo(name="test_agent"),
+            agent_info=vertexai_genai_types.evals.AgentInfo(name="test_agent"),
             eval_case_results=[
                 vertexai_genai_types.EvalCaseResult(
                     eval_case_index=0,
@@ -284,7 +286,7 @@ def setup_method(self):
         self.client = vertexai.Client(project=_TEST_PROJECT, location=_TEST_LOCATION)
 
     @mock.patch.object(_evals_common, "Models")
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     def test_inference_with_string_model_success(
         self, mock_eval_dataset_loader, mock_models
     ):
@@ -327,7 +329,7 @@ def test_inference_with_string_model_success(
         assert inference_result.candidate_name == "gemini-pro"
         assert inference_result.gcs_source is None
 
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     def test_inference_with_callable_model_sets_candidate_name(
         self, mock_eval_dataset_loader
     ):
@@ -346,7 +348,7 @@ def my_model_fn(contents):
         assert inference_result.candidate_name == "my_model_fn"
         assert inference_result.gcs_source is None
 
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     def test_inference_with_lambda_model_candidate_name_is_none(
         self, mock_eval_dataset_loader
     ):
@@ -368,7 +370,7 @@ def test_inference_with_lambda_model_candidate_name_is_none(
         )
         assert inference_result.gcs_source is None
 
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     def test_inference_with_callable_model_success(self, mock_eval_dataset_loader):
         mock_df = pd.DataFrame({"prompt": ["test prompt"]})
         mock_eval_dataset_loader.return_value.load.return_value = mock_df.to_dict(
@@ -396,7 +398,7 @@ def mock_model_fn(contents):
         assert inference_result.gcs_source is None
 
     @mock.patch.object(_evals_common, "Models")
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     def test_inference_with_prompt_template(
         self, mock_eval_dataset_loader, mock_models
     ):
@@ -443,8 +445,8 @@ def test_inference_with_prompt_template(
         assert inference_result.gcs_source is None
 
     @mock.patch.object(_evals_common, "Models")
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
-    @mock.patch.object(_evals_utils, "GcsUtils")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
+    @mock.patch.object(_gcs_utils, "GcsUtils")
     def test_inference_with_gcs_destination(
         self, mock_gcs_utils, mock_eval_dataset_loader, mock_models
     ):
@@ -497,7 +499,7 @@ def test_inference_with_gcs_destination(
         )
 
     @mock.patch.object(_evals_common, "Models")
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     @mock.patch("pandas.DataFrame.to_json")
     @mock.patch("os.makedirs")
     def test_inference_with_local_destination(
@@ -549,7 +551,7 @@ def test_inference_with_local_destination(
         assert inference_result.gcs_source is None
 
     @mock.patch.object(_evals_common, "Models")
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     def test_inference_from_request_column_save_to_local_dir(
         self, mock_eval_dataset_loader, mock_models
     ):
@@ -783,7 +785,7 @@ def test_inference_from_local_csv_file(self, mock_models):
         assert inference_result.gcs_source is None
 
     @mock.patch.object(_evals_common, "Models")
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     def test_inference_with_row_level_config_overrides(
         self, mock_eval_dataset_loader, mock_models
     ):
@@ -968,7 +970,7 @@ def mock_generate_content_logic(*args, **kwargs):
         assert inference_result.gcs_source is None
 
     @mock.patch.object(_evals_common, "Models")
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     def test_inference_with_multimodal_content(
         self, mock_eval_dataset_loader, mock_models
     ):
@@ -1044,7 +1046,7 @@ def test_inference_with_multimodal_content(
         assert inference_result.candidate_name == "gemini-pro"
         assert inference_result.gcs_source is None
 
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     @mock.patch("vertexai._genai._evals_common.vertexai.Client")
     def test_run_inference_with_agent_engine_and_session_inputs_dict(
         self,
@@ -1140,7 +1142,7 @@ async def _async_iterator(iterable):
         assert inference_result.candidate_name == "agent"
         assert inference_result.gcs_source is None
 
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     @mock.patch("vertexai._genai._evals_common.vertexai.Client")
     def test_run_inference_with_agent_engine_and_session_inputs_literal_string(
         self,
@@ -1423,7 +1425,7 @@ def test_run_inference_with_litellm_import_error(self, mock_api_client_fixture):
     @mock.patch.object(_evals_common, "_is_gemini_model")
     @mock.patch.object(_evals_common, "_is_litellm_model")
     @mock.patch.object(_evals_common, "_is_litellm_vertex_maas_model")
-    @mock.patch.object(_evals_utils, "EvalDatasetLoader")
+    @mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
     def test_run_inference_with_litellm_parsing(
         self,
         mock_eval_dataset_loader,
@@ -2783,7 +2785,7 @@ def test_agent_info_creation(self):
                 )
             ]
         )
-        agent_info = vertexai_genai_types.AgentInfo(
+        agent_info = vertexai_genai_types.evals.AgentInfo(
             name="agent1",
             instruction="instruction1",
             description="description1",
@@ -2827,7 +2829,7 @@ def test_eval_case_with_agent_eval_fields(self):
                 )
             ]
         )
-        agent_info = vertexai_genai_types.AgentInfo(
+        agent_info = vertexai_genai_types.evals.AgentInfo(
             name="agent1",
             instruction="instruction1",
             tool_declarations=[tool],
@@ -2933,7 +2935,7 @@ def test_metric_name_validation_lowercase(self):
         metric = vertexai_genai_types.Metric(name="UPPERCASEMetric")
         assert metric.name == "uppercasemetric"
 
-    @mock.patch("vertexai._genai.types.yaml.dump")
+    @mock.patch("vertexai._genai.types.common.yaml.dump")
     @mock.patch("builtins.open", new_callable=mock.mock_open)
     def test_metric_to_yaml_file_with_version_and_set_fields(
         self, mock_open_file, mock_yaml_dump
@@ -2970,7 +2972,7 @@ def test_metric_to_yaml_file_with_version_and_set_fields(
             allow_unicode=True,
         )
 
-    @mock.patch("vertexai._genai.types.yaml.dump")
+    @mock.patch("vertexai._genai.types.common.yaml.dump")
     @mock.patch("builtins.open", new_callable=mock.mock_open)
     def test_metric_to_yaml_file_without_version_minimal_fields(
         self, mock_open_file, mock_yaml_dump
@@ -2991,7 +2993,7 @@ def test_metric_to_yaml_file_without_version_minimal_fields(
             allow_unicode=True,
         )
 
-    @mock.patch("vertexai._genai.types.yaml", None)
+    @mock.patch("vertexai._genai.types.common.yaml", None)
     def test_metric_to_yaml_file_raises_importerror_if_yaml_is_none(self):
         metric_obj = vertexai_genai_types.Metric(name="ErrorMetric")
         with pytest.raises(
@@ -3699,7 +3701,7 @@ def test_eval_case_to_agent_data(self):
                 )
             ]
         )
-        agent_info = vertexai_genai_types.AgentInfo(
+        agent_info = vertexai_genai_types.evals.AgentInfo(
             name="agent1",
             instruction="instruction1",
             tool_declarations=[tool],
@@ -3797,7 +3799,6 @@ def setup_method(self):
         importlib.reload(aiplatform_initializer)
         importlib.reload(aiplatform)
         importlib.reload(vertexai)
-        importlib.reload(genai_types)
         importlib.reload(vertexai_genai_types)
         importlib.reload(_evals_data_converters)
         importlib.reload(_evals_metric_handlers)
@@ -4346,7 +4347,7 @@ def test_execute_evaluation_with_openai_schema(
             name="test_metric", prompt_template="Evaluate: {response}"
         )
 
-        with mock.patch.object(_evals_utils, "EvalDatasetLoader") as mock_loader_class:
+        with mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader") as mock_loader_class:
             mock_loader_instance = mock_loader_class.return_value
             mock_loader_instance.load.return_value = mock_openai_raw_data
 
@@ -4599,7 +4600,7 @@ def test_execute_evaluation_lazy_loaded_prebuilt_metric_instance(
             eval_dataset_df=dataset_df
         )
 
-        lazy_metric_instance = _evals_utils.LazyLoadedPrebuiltMetric(
+        lazy_metric_instance = _evals_metric_loaders.LazyLoadedPrebuiltMetric(
             name="fluency", version="v1"
         )
 
@@ -4799,7 +4800,7 @@ def test_execute_evaluation_adds_creation_timestamp(
 class TestEvaluationDataset:
     """Contains set of tests for the EvaluationDataset class methods."""
 
-    @mock.patch.object(_evals_utils, "GcsUtils")
+    @mock.patch.object(_gcs_utils, "GcsUtils")
     def test_load_from_observability_eval_cases(self, mock_gcs_utils):
         """Tests that load_from_observability_eval_cases reads data from GCS."""
 
@@ -4851,7 +4852,7 @@ def read_file_contents_side_effect(src: str) -> str:
             ),
         )
 
-    @mock.patch.object(_evals_utils, "GcsUtils")
+    @mock.patch.object(_gcs_utils, "GcsUtils")
     def test_load_from_observability_eval_cases_no_system_instruction(
         self, mock_gcs_utils
     ):
@@ -4903,7 +4904,7 @@ def read_file_contents_side_effect(src: str) -> str:
             ),
         )
 
-    @mock.patch.object(_evals_utils, "GcsUtils")
+    @mock.patch.object(_gcs_utils, "GcsUtils")
     def test_load_from_observability_eval_cases_multiple_cases(self, mock_gcs_utils):
         """Test load_from_observability_eval_cases can handle multiple cases."""
 
 
@@ -0,0 +1,49 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+
+from google.cloud import bigquery
+from google.genai._api_client import BaseApiClient
+import pandas as pd
+
+
+logger = logging.getLogger(__name__)
+
+
+class BigQueryUtils:
+    """Handles BigQuery operations."""
+
+    def __init__(self, api_client: BaseApiClient):
+        self.api_client = api_client
+        self.bigquery_client = bigquery.Client(
+            project=self.api_client.project,
+            credentials=self.api_client._credentials,
+        )
+
+    def load_bigquery_to_dataframe(self, table_uri: str) -> "pd.DataFrame":
+        """Loads data from a BigQuery table into a DataFrame."""
+        table = self.bigquery_client.get_table(table_uri)
+        return self.bigquery_client.list_rows(table).to_dataframe()
+
+    def upload_dataframe_to_bigquery(
+        self, df: "pd.DataFrame", bq_table_uri: str
+    ) -> None:
+        """Uploads a Pandas DataFrame to a BigQuery table."""
+        job = self.bigquery_client.load_table_from_dataframe(df, bq_table_uri)
+        job.result()
+        logger.info(
+            f"DataFrame successfully uploaded to BigQuery table: {bq_table_uri}"
+        )