From 8ebab952f13b45c6d1e56eee59d5980548358f62 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 10 Dec 2025 22:02:11 +0000
Subject: [PATCH 1/4] chore: move long running LLM tests to the large directory

Moved the following long-running tests from `tests/system/small/ml/` to `tests/system/large/ml/`:
- `test_llm_gemini_score`
- `test_llm_gemini_pro_score_params`
- `test_gemini_text_generator_predict_default_params_success`
- `test_gemini_text_generator_predict_output_schema_success`
- `test_gemini_text_generator_multi_cols_predict_success`
- `test_gemini_text_generator_predict_with_params_success`
- `test_create_load_gemini_text_generator_model`
- `test_gemini_text_generator_multimodal_input`
- `test_linear_reg_model_global_explain`
---
 tests/system/large/ml/test_linear_model.py   |  35 +++
 tests/system/large/ml/test_llm.py            | 234 +++++++++++++++++++
 tests/system/large/ml/test_multimodal_llm.py |  47 ++++
 tests/system/small/ml/test_linear_model.py   |  36 ---
 tests/system/small/ml/test_llm.py            | 212 -----------------
 tests/system/small/ml/test_multimodal_llm.py |  26 ---
 6 files changed, 316 insertions(+), 274 deletions(-)
 create mode 100644 tests/system/large/ml/test_llm.py
 create mode 100644 tests/system/large/ml/test_multimodal_llm.py

diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py
index f0e2892ba8..052353505e 100644
--- a/tests/system/large/ml/test_linear_model.py
+++ b/tests/system/large/ml/test_linear_model.py
@@ -452,3 +452,38 @@ def test_model_centroids_with_custom_index(penguins_df_default_index):
 
     # If this line executes without errors, the model has correctly ignored the custom index columns
     model.predict(X_train.reset_index(drop=True))
+
+def test_linear_reg_model_global_explain(
+    penguins_linear_model_w_global_explain, new_penguins_df
+):
+    training_data = new_penguins_df.dropna(subset=["body_mass_g"])
+    X = training_data.drop(columns=["body_mass_g"])
+    y = training_data[["body_mass_g"]]
+    penguins_linear_model_w_global_explain.fit(X, y)
+    global_ex = penguins_linear_model_w_global_explain.global_explain()
+    assert global_ex.shape == (6, 1)
+    expected_columns = pd.Index(["attribution"])
+    pd.testing.assert_index_equal(global_ex.columns, expected_columns)
+    result = global_ex.to_pandas().drop(["attribution"], axis=1).sort_index()
+    expected_feature = (
+        pd.DataFrame(
+            {
+                "feature": [
+                    "island",
+                    "species",
+                    "sex",
+                    "flipper_length_mm",
+                    "culmen_depth_mm",
+                    "culmen_length_mm",
+                ]
+            },
+        )
+        .set_index("feature")
+        .sort_index()
+    )
+    pd.testing.assert_frame_equal(
+        result,
+        expected_feature,
+        check_exact=False,
+        check_index_type=False,
+    )
diff --git a/tests/system/large/ml/test_llm.py b/tests/system/large/ml/test_llm.py
new file mode 100644
index 0000000000..31eb540087
--- /dev/null
+++ b/tests/system/large/ml/test_llm.py
@@ -0,0 +1,234 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+from bigframes import exceptions
+from bigframes.ml import llm
+import bigframes.pandas as bpd
+from bigframes.testing import utils
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    (
+        "gemini-2.0-flash-exp",
+        "gemini-2.0-flash-001",
+        "gemini-2.0-flash-lite-001",
+        "gemini-2.5-pro",
+        "gemini-2.5-flash",
+        "gemini-2.5-flash-lite",
+    ),
+)
+@pytest.mark.flaky(
+    retries=2
+)  # usually create model shouldn't be flaky, but this one due to the limited quota of gemini-2.0-flash-exp.
+def test_create_load_gemini_text_generator_model(
+    dataset_id, model_name, session, bq_connection
+):
+    gemini_text_generator_model = llm.GeminiTextGenerator(
+        model_name=model_name, connection_name=bq_connection, session=session
+    )
+    assert gemini_text_generator_model is not None
+    assert gemini_text_generator_model._bqml_model is not None
+
+    # save, load to ensure configuration was kept
+    reloaded_model = gemini_text_generator_model.to_gbq(
+        f"{dataset_id}.temp_text_model", replace=True
+    )
+    assert f"{dataset_id}.temp_text_model" == reloaded_model._bqml_model.model_name
+    assert reloaded_model.connection_name == bq_connection
+    assert reloaded_model.model_name == model_name
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    (
+        "gemini-2.0-flash-exp",
+        "gemini-2.0-flash-001",
+        "gemini-2.0-flash-lite-001",
+        "gemini-2.5-pro",
+        "gemini-2.5-flash",
+        "gemini-2.5-flash-lite",
+    ),
+)
+# @pytest.mark.flaky(retries=2)
+def test_gemini_text_generator_predict_default_params_success(
+    llm_text_df, model_name, session, bq_connection
+):
+    gemini_text_generator_model = llm.GeminiTextGenerator(
+        model_name=model_name, connection_name=bq_connection, session=session
+    )
+    df = gemini_text_generator_model.predict(llm_text_df).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
+    )
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    (
+        "gemini-2.0-flash-exp",
+        "gemini-2.0-flash-001",
+        "gemini-2.0-flash-lite-001",
+        "gemini-2.5-pro",
+        "gemini-2.5-flash",
+        "gemini-2.5-flash-lite",
+    ),
+)
+@pytest.mark.flaky(retries=2)
+def test_gemini_text_generator_predict_with_params_success(
+    llm_text_df, model_name, session, bq_connection
+):
+    gemini_text_generator_model = llm.GeminiTextGenerator(
+        model_name=model_name, connection_name=bq_connection, session=session
+    )
+    df = gemini_text_generator_model.predict(
+        llm_text_df, temperature=0.5, max_output_tokens=100, top_k=20, top_p=0.5
+    ).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
+    )
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    (
+        "gemini-2.0-flash-exp",
+        "gemini-2.0-flash-001",
+        "gemini-2.0-flash-lite-001",
+        "gemini-2.5-pro",
+        "gemini-2.5-flash",
+        "gemini-2.5-flash-lite",
+    ),
+)
+@pytest.mark.flaky(retries=2)
+def test_gemini_text_generator_multi_cols_predict_success(
+    llm_text_df: bpd.DataFrame, model_name, session, bq_connection
+):
+    df = llm_text_df.assign(additional_col=1)
+    gemini_text_generator_model = llm.GeminiTextGenerator(
+        model_name=model_name, connection_name=bq_connection, session=session
+    )
+    pd_df = gemini_text_generator_model.predict(df).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        pd_df,
+        columns=utils.ML_GENERATE_TEXT_OUTPUT + ["additional_col"],
+        index=3,
+        col_exact=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    (
+        "gemini-2.0-flash-exp",
+        "gemini-2.0-flash-001",
+        "gemini-2.0-flash-lite-001",
+        "gemini-2.5-pro",
+        "gemini-2.5-flash",
+        "gemini-2.5-flash-lite",
+    ),
+)
+@pytest.mark.flaky(retries=2)
+def test_gemini_text_generator_predict_output_schema_success(
+    llm_text_df: bpd.DataFrame, model_name, session, bq_connection
+):
+    gemini_text_generator_model = llm.GeminiTextGenerator(
+        model_name=model_name, connection_name=bq_connection, session=session
+    )
+    output_schema = {
+        "bool_output": "bool",
+        "int_output": "int64",
+        "float_output": "float64",
+        "str_output": "string",
+        "array_output": "array<int64>",
+        "struct_output": "struct<number int64>",
+    }
+    df = gemini_text_generator_model.predict(llm_text_df, output_schema=output_schema)
+    assert df["bool_output"].dtype == pd.BooleanDtype()
+    assert df["int_output"].dtype == pd.Int64Dtype()
+    assert df["float_output"].dtype == pd.Float64Dtype()
+    assert df["str_output"].dtype == pd.StringDtype(storage="pyarrow")
+    assert df["array_output"].dtype == pd.ArrowDtype(pa.list_(pa.int64()))
+    assert df["struct_output"].dtype == pd.ArrowDtype(
+        pa.struct([("number", pa.int64())])
+    )
+
+    pd_df = df.to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        pd_df,
+        columns=list(output_schema.keys()) + ["prompt", "full_response", "status"],
+        index=3,
+        col_exact=False,
+    )
+
+
+@pytest.mark.flaky(retries=2)
+@pytest.mark.parametrize(
+    "model_name",
+    (
+        "gemini-2.0-flash-001",
+        "gemini-2.0-flash-lite-001",
+    ),
+)
+def test_llm_gemini_score(llm_fine_tune_df_default_index, model_name):
+    model = llm.GeminiTextGenerator(model_name=model_name)
+
+    # Check score to ensure the model was fitted
+    score_result = model.score(
+        X=llm_fine_tune_df_default_index[["prompt"]],
+        y=llm_fine_tune_df_default_index[["label"]],
+    ).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        score_result,
+        columns=[
+            "bleu4_score",
+            "rouge-l_precision",
+            "rouge-l_recall",
+            "rouge-l_f1_score",
+            "evaluation_status",
+        ],
+        index=1,
+    )
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    (
+        "gemini-2.0-flash-001",
+        "gemini-2.0-flash-lite-001",
+    ),
+)
+def test_llm_gemini_pro_score_params(llm_fine_tune_df_default_index, model_name):
+    model = llm.GeminiTextGenerator(model_name=model_name)
+
+    # Check score to ensure the model was fitted
+    score_result = model.score(
+        X=llm_fine_tune_df_default_index["prompt"],
+        y=llm_fine_tune_df_default_index["label"],
+        task_type="classification",
+    ).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        score_result,
+        columns=[
+            "precision",
+            "recall",
+            "f1_score",
+            "label",
+            "evaluation_status",
+        ],
+    )
diff --git a/tests/system/large/ml/test_multimodal_llm.py b/tests/system/large/ml/test_multimodal_llm.py
new file mode 100644
index 0000000000..12eade47ba
--- /dev/null
+++ b/tests/system/large/ml/test_multimodal_llm.py
@@ -0,0 +1,47 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+from bigframes.ml import llm
+import bigframes.pandas as bpd
+from bigframes.testing import utils
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    (
+        "gemini-2.0-flash-exp",
+        "gemini-2.0-flash-001",
+        "gemini-2.0-flash-lite-001",
+    ),
+)
+@pytest.mark.flaky(retries=2)
+def test_gemini_text_generator_multimodal_input(
+    images_mm_df: bpd.DataFrame, model_name, session, bq_connection
+):
+    gemini_text_generator_model = llm.GeminiTextGenerator(
+        model_name=model_name, connection_name=bq_connection, session=session
+    )
+    pd_df = gemini_text_generator_model.predict(
+        images_mm_df, prompt=["Describe", images_mm_df["blob_col"]]
+    ).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        pd_df,
+        columns=utils.ML_GENERATE_TEXT_OUTPUT + ["blob_col"],
+        index=2,
+        col_exact=False,
+    )
diff --git a/tests/system/small/ml/test_linear_model.py b/tests/system/small/ml/test_linear_model.py
index 8b04d55e61..da9fc8e14f 100644
--- a/tests/system/small/ml/test_linear_model.py
+++ b/tests/system/small/ml/test_linear_model.py
@@ -228,42 +228,6 @@ def test_to_gbq_saved_linear_reg_model_scores(
     )
 
 
-def test_linear_reg_model_global_explain(
-    penguins_linear_model_w_global_explain, new_penguins_df
-):
-    training_data = new_penguins_df.dropna(subset=["body_mass_g"])
-    X = training_data.drop(columns=["body_mass_g"])
-    y = training_data[["body_mass_g"]]
-    penguins_linear_model_w_global_explain.fit(X, y)
-    global_ex = penguins_linear_model_w_global_explain.global_explain()
-    assert global_ex.shape == (6, 1)
-    expected_columns = pandas.Index(["attribution"])
-    pandas.testing.assert_index_equal(global_ex.columns, expected_columns)
-    result = global_ex.to_pandas().drop(["attribution"], axis=1).sort_index()
-    expected_feature = (
-        pandas.DataFrame(
-            {
-                "feature": [
-                    "island",
-                    "species",
-                    "sex",
-                    "flipper_length_mm",
-                    "culmen_depth_mm",
-                    "culmen_length_mm",
-                ]
-            },
-        )
-        .set_index("feature")
-        .sort_index()
-    )
-    pandas.testing.assert_frame_equal(
-        result,
-        expected_feature,
-        check_exact=False,
-        check_index_type=False,
-    )
-
-
 def test_to_gbq_replace(penguins_linear_model, table_id_unique):
     penguins_linear_model.to_gbq(table_id_unique, replace=True)
     with pytest.raises(google.api_core.exceptions.Conflict):
diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index 112acb7cac..de06b55398 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -105,161 +105,6 @@ def test_create_load_multimodal_embedding_generator_model(
     assert reloaded_model.connection_name == bq_connection
 
 
-@pytest.mark.parametrize(
-    "model_name",
-    (
-        "gemini-2.0-flash-exp",
-        "gemini-2.0-flash-001",
-        "gemini-2.0-flash-lite-001",
-        "gemini-2.5-pro",
-        "gemini-2.5-flash",
-        "gemini-2.5-flash-lite",
-    ),
-)
-@pytest.mark.flaky(
-    retries=2
-)  # usually create model shouldn't be flaky, but this one due to the limited quota of gemini-2.0-flash-exp.
-def test_create_load_gemini_text_generator_model(
-    dataset_id, model_name, session, bq_connection
-):
-    gemini_text_generator_model = llm.GeminiTextGenerator(
-        model_name=model_name, connection_name=bq_connection, session=session
-    )
-    assert gemini_text_generator_model is not None
-    assert gemini_text_generator_model._bqml_model is not None
-
-    # save, load to ensure configuration was kept
-    reloaded_model = gemini_text_generator_model.to_gbq(
-        f"{dataset_id}.temp_text_model", replace=True
-    )
-    assert f"{dataset_id}.temp_text_model" == reloaded_model._bqml_model.model_name
-    assert reloaded_model.connection_name == bq_connection
-    assert reloaded_model.model_name == model_name
-
-
-@pytest.mark.parametrize(
-    "model_name",
-    (
-        "gemini-2.0-flash-exp",
-        "gemini-2.0-flash-001",
-        "gemini-2.0-flash-lite-001",
-        "gemini-2.5-pro",
-        "gemini-2.5-flash",
-        "gemini-2.5-flash-lite",
-    ),
-)
-# @pytest.mark.flaky(retries=2)
-def test_gemini_text_generator_predict_default_params_success(
-    llm_text_df, model_name, session, bq_connection
-):
-    gemini_text_generator_model = llm.GeminiTextGenerator(
-        model_name=model_name, connection_name=bq_connection, session=session
-    )
-    df = gemini_text_generator_model.predict(llm_text_df).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
-    )
-
-
-@pytest.mark.parametrize(
-    "model_name",
-    (
-        "gemini-2.0-flash-exp",
-        "gemini-2.0-flash-001",
-        "gemini-2.0-flash-lite-001",
-        "gemini-2.5-pro",
-        "gemini-2.5-flash",
-        "gemini-2.5-flash-lite",
-    ),
-)
-@pytest.mark.flaky(retries=2)
-def test_gemini_text_generator_predict_with_params_success(
-    llm_text_df, model_name, session, bq_connection
-):
-    gemini_text_generator_model = llm.GeminiTextGenerator(
-        model_name=model_name, connection_name=bq_connection, session=session
-    )
-    df = gemini_text_generator_model.predict(
-        llm_text_df, temperature=0.5, max_output_tokens=100, top_k=20, top_p=0.5
-    ).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
-    )
-
-
-@pytest.mark.parametrize(
-    "model_name",
-    (
-        "gemini-2.0-flash-exp",
-        "gemini-2.0-flash-001",
-        "gemini-2.0-flash-lite-001",
-        "gemini-2.5-pro",
-        "gemini-2.5-flash",
-        "gemini-2.5-flash-lite",
-    ),
-)
-@pytest.mark.flaky(retries=2)
-def test_gemini_text_generator_multi_cols_predict_success(
-    llm_text_df: bpd.DataFrame, model_name, session, bq_connection
-):
-    df = llm_text_df.assign(additional_col=1)
-    gemini_text_generator_model = llm.GeminiTextGenerator(
-        model_name=model_name, connection_name=bq_connection, session=session
-    )
-    pd_df = gemini_text_generator_model.predict(df).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        pd_df,
-        columns=utils.ML_GENERATE_TEXT_OUTPUT + ["additional_col"],
-        index=3,
-        col_exact=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "model_name",
-    (
-        "gemini-2.0-flash-exp",
-        "gemini-2.0-flash-001",
-        "gemini-2.0-flash-lite-001",
-        "gemini-2.5-pro",
-        "gemini-2.5-flash",
-        "gemini-2.5-flash-lite",
-    ),
-)
-@pytest.mark.flaky(retries=2)
-def test_gemini_text_generator_predict_output_schema_success(
-    llm_text_df: bpd.DataFrame, model_name, session, bq_connection
-):
-    gemini_text_generator_model = llm.GeminiTextGenerator(
-        model_name=model_name, connection_name=bq_connection, session=session
-    )
-    output_schema = {
-        "bool_output": "bool",
-        "int_output": "int64",
-        "float_output": "float64",
-        "str_output": "string",
-        "array_output": "array<int64>",
-        "struct_output": "struct<number int64>",
-    }
-    df = gemini_text_generator_model.predict(llm_text_df, output_schema=output_schema)
-    assert df["bool_output"].dtype == pd.BooleanDtype()
-    assert df["int_output"].dtype == pd.Int64Dtype()
-    assert df["float_output"].dtype == pd.Float64Dtype()
-    assert df["str_output"].dtype == pd.StringDtype(storage="pyarrow")
-    assert df["array_output"].dtype == pd.ArrowDtype(pa.list_(pa.int64()))
-    assert df["struct_output"].dtype == pd.ArrowDtype(
-        pa.struct([("number", pa.int64())])
-    )
-
-    pd_df = df.to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        pd_df,
-        columns=list(output_schema.keys()) + ["prompt", "full_response", "status"],
-        index=3,
-        col_exact=False,
-    )
-
-
 # Overrides __eq__ function for comparing as mock.call parameter
 class EqCmpAllDataFrame(bpd.DataFrame):
     def __eq__(self, other):
@@ -742,63 +587,6 @@ def test_text_embedding_generator_retry_no_progress(session, bq_connection):
         )
 
 
-@pytest.mark.flaky(retries=2)
-@pytest.mark.parametrize(
-    "model_name",
-    (
-        "gemini-2.0-flash-001",
-        "gemini-2.0-flash-lite-001",
-    ),
-)
-def test_llm_gemini_score(llm_fine_tune_df_default_index, model_name):
-    model = llm.GeminiTextGenerator(model_name=model_name)
-
-    # Check score to ensure the model was fitted
-    score_result = model.score(
-        X=llm_fine_tune_df_default_index[["prompt"]],
-        y=llm_fine_tune_df_default_index[["label"]],
-    ).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        score_result,
-        columns=[
-            "bleu4_score",
-            "rouge-l_precision",
-            "rouge-l_recall",
-            "rouge-l_f1_score",
-            "evaluation_status",
-        ],
-        index=1,
-    )
-
-
-@pytest.mark.parametrize(
-    "model_name",
-    (
-        "gemini-2.0-flash-001",
-        "gemini-2.0-flash-lite-001",
-    ),
-)
-def test_llm_gemini_pro_score_params(llm_fine_tune_df_default_index, model_name):
-    model = llm.GeminiTextGenerator(model_name=model_name)
-
-    # Check score to ensure the model was fitted
-    score_result = model.score(
-        X=llm_fine_tune_df_default_index["prompt"],
-        y=llm_fine_tune_df_default_index["label"],
-        task_type="classification",
-    ).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        score_result,
-        columns=[
-            "precision",
-            "recall",
-            "f1_score",
-            "label",
-            "evaluation_status",
-        ],
-    )
-
-
 @pytest.mark.parametrize(
     "model_name",
     ("gemini-2.0-flash-exp",),
diff --git a/tests/system/small/ml/test_multimodal_llm.py b/tests/system/small/ml/test_multimodal_llm.py
index 48a69f522c..e29669afd3 100644
--- a/tests/system/small/ml/test_multimodal_llm.py
+++ b/tests/system/small/ml/test_multimodal_llm.py
@@ -38,32 +38,6 @@ def test_multimodal_embedding_generator_predict_default_params_success(
     assert len(df["ml_generate_embedding_result"][0]) == 1408
 
 
-@pytest.mark.parametrize(
-    "model_name",
-    (
-        "gemini-2.0-flash-exp",
-        "gemini-2.0-flash-001",
-        "gemini-2.0-flash-lite-001",
-    ),
-)
-@pytest.mark.flaky(retries=2)
-def test_gemini_text_generator_multimodal_input(
-    images_mm_df: bpd.DataFrame, model_name, session, bq_connection
-):
-    gemini_text_generator_model = llm.GeminiTextGenerator(
-        model_name=model_name, connection_name=bq_connection, session=session
-    )
-    pd_df = gemini_text_generator_model.predict(
-        images_mm_df, prompt=["Describe", images_mm_df["blob_col"]]
-    ).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        pd_df,
-        columns=utils.ML_GENERATE_TEXT_OUTPUT + ["blob_col"],
-        index=2,
-        col_exact=False,
-    )
-
-
 @pytest.mark.parametrize(
     "model_name",
     (

From 176adbb2cbed6e9ab14e22c19e89321064e225bb Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 10 Dec 2025 22:07:16 +0000
Subject: [PATCH 2/4] chore: move long running LLM tests to the large directory

Moved the following long-running tests from `tests/system/small/ml/` to `tests/system/large/ml/`:
- `test_llm_gemini_score`
- `test_llm_gemini_pro_score_params`
- `test_gemini_text_generator_predict_default_params_success`
- `test_gemini_text_generator_predict_output_schema_success`
- `test_gemini_text_generator_multi_cols_predict_success`
- `test_gemini_text_generator_predict_with_params_success`
- `test_create_load_gemini_text_generator_model`
- `test_gemini_text_generator_multimodal_input`
- `test_linear_reg_model_global_explain`

From a441205bd614c9e991eca92463efbe2d8b2df0dd Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 10 Dec 2025 22:30:49 +0000
Subject: [PATCH 3/4] chore: move long running LLM tests to the large directory

Moved the following long-running tests from `tests/system/small/ml/` to `tests/system/large/ml/`:
- `test_llm_gemini_score`
- `test_llm_gemini_pro_score_params`
- `test_gemini_text_generator_predict_default_params_success`
- `test_gemini_text_generator_predict_output_schema_success`
- `test_gemini_text_generator_multi_cols_predict_success`
- `test_gemini_text_generator_predict_with_params_success`
- `test_create_load_gemini_text_generator_model`
- `test_gemini_text_generator_multimodal_input`
- `test_linear_reg_model_global_explain`
---
 tests/system/large/ml/test_linear_model.py   | 1 +
 tests/system/large/ml/test_llm.py            | 1 -
 tests/system/large/ml/test_multimodal_llm.py | 2 --
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py
index 052353505e..a70d214b7f 100644
--- a/tests/system/large/ml/test_linear_model.py
+++ b/tests/system/large/ml/test_linear_model.py
@@ -453,6 +453,7 @@ def test_model_centroids_with_custom_index(penguins_df_default_index):
     # If this line executes without errors, the model has correctly ignored the custom index columns
     model.predict(X_train.reset_index(drop=True))
 
+
 def test_linear_reg_model_global_explain(
     penguins_linear_model_w_global_explain, new_penguins_df
 ):
diff --git a/tests/system/large/ml/test_llm.py b/tests/system/large/ml/test_llm.py
index 31eb540087..1daaebb8cb 100644
--- a/tests/system/large/ml/test_llm.py
+++ b/tests/system/large/ml/test_llm.py
@@ -16,7 +16,6 @@
 import pyarrow as pa
 import pytest
 
-from bigframes import exceptions
 from bigframes.ml import llm
 import bigframes.pandas as bpd
 from bigframes.testing import utils
diff --git a/tests/system/large/ml/test_multimodal_llm.py b/tests/system/large/ml/test_multimodal_llm.py
index 12eade47ba..03fdddf665 100644
--- a/tests/system/large/ml/test_multimodal_llm.py
+++ b/tests/system/large/ml/test_multimodal_llm.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pandas as pd
-import pyarrow as pa
 import pytest
 
 from bigframes.ml import llm

From 3b6dc9491ef6a78672a4faa37cd434cda635c93d Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 10 Dec 2025 22:36:57 +0000
Subject: [PATCH 4/4] chore: move long running LLM tests to the large directory

Moved the following long-running tests from `tests/system/small/ml/` to `tests/system/large/ml/`:
- `test_llm_gemini_score`
- `test_llm_gemini_pro_score_params`
- `test_gemini_text_generator_predict_default_params_success`
- `test_gemini_text_generator_predict_output_schema_success`
- `test_gemini_text_generator_multi_cols_predict_success`
- `test_gemini_text_generator_predict_with_params_success`
- `test_create_load_gemini_text_generator_model`
- `test_gemini_text_generator_multimodal_input`
- `test_linear_reg_model_global_explain`
---
 tests/system/small/ml/test_llm.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index de06b55398..d15c5d3160 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -16,7 +16,6 @@
 from unittest import mock
 
 import pandas as pd
-import pyarrow as pa
 import pytest
 
 from bigframes import exceptions