Skip to content

Commit 8ebab95

Browse files
chore: move long running LLM tests to the large directory
Moved the following long-running tests from `tests/system/small/ml/` to `tests/system/large/ml/`: - `test_llm_gemini_score` - `test_llm_gemini_pro_score_params` - `test_gemini_text_generator_predict_default_params_success` - `test_gemini_text_generator_predict_output_schema_success` - `test_gemini_text_generator_multi_cols_predict_success` - `test_gemini_text_generator_predict_with_params_success` - `test_create_load_gemini_text_generator_model` - `test_gemini_text_generator_multimodal_input` - `test_linear_reg_model_global_explain`
1 parent e4e3ec8 commit 8ebab95

File tree

6 files changed

+316
-274
lines changed

6 files changed

+316
-274
lines changed

tests/system/large/ml/test_linear_model.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,3 +452,38 @@ def test_model_centroids_with_custom_index(penguins_df_default_index):
452452

453453
# If this line executes without errors, the model has correctly ignored the custom index columns
454454
model.predict(X_train.reset_index(drop=True))
455+
456+
def test_linear_reg_model_global_explain(
457+
penguins_linear_model_w_global_explain, new_penguins_df
458+
):
459+
training_data = new_penguins_df.dropna(subset=["body_mass_g"])
460+
X = training_data.drop(columns=["body_mass_g"])
461+
y = training_data[["body_mass_g"]]
462+
penguins_linear_model_w_global_explain.fit(X, y)
463+
global_ex = penguins_linear_model_w_global_explain.global_explain()
464+
assert global_ex.shape == (6, 1)
465+
expected_columns = pd.Index(["attribution"])
466+
pd.testing.assert_index_equal(global_ex.columns, expected_columns)
467+
result = global_ex.to_pandas().drop(["attribution"], axis=1).sort_index()
468+
expected_feature = (
469+
pd.DataFrame(
470+
{
471+
"feature": [
472+
"island",
473+
"species",
474+
"sex",
475+
"flipper_length_mm",
476+
"culmen_depth_mm",
477+
"culmen_length_mm",
478+
]
479+
},
480+
)
481+
.set_index("feature")
482+
.sort_index()
483+
)
484+
pd.testing.assert_frame_equal(
485+
result,
486+
expected_feature,
487+
check_exact=False,
488+
check_index_type=False,
489+
)

tests/system/large/ml/test_llm.py

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pandas as pd
16+
import pyarrow as pa
17+
import pytest
18+
19+
from bigframes import exceptions
20+
from bigframes.ml import llm
21+
import bigframes.pandas as bpd
22+
from bigframes.testing import utils
23+
24+
25+
@pytest.mark.parametrize(
26+
"model_name",
27+
(
28+
"gemini-2.0-flash-exp",
29+
"gemini-2.0-flash-001",
30+
"gemini-2.0-flash-lite-001",
31+
"gemini-2.5-pro",
32+
"gemini-2.5-flash",
33+
"gemini-2.5-flash-lite",
34+
),
35+
)
36+
@pytest.mark.flaky(
37+
retries=2
38+
) # usually create model shouldn't be flaky, but this one due to the limited quota of gemini-2.0-flash-exp.
39+
def test_create_load_gemini_text_generator_model(
40+
dataset_id, model_name, session, bq_connection
41+
):
42+
gemini_text_generator_model = llm.GeminiTextGenerator(
43+
model_name=model_name, connection_name=bq_connection, session=session
44+
)
45+
assert gemini_text_generator_model is not None
46+
assert gemini_text_generator_model._bqml_model is not None
47+
48+
# save, load to ensure configuration was kept
49+
reloaded_model = gemini_text_generator_model.to_gbq(
50+
f"{dataset_id}.temp_text_model", replace=True
51+
)
52+
assert f"{dataset_id}.temp_text_model" == reloaded_model._bqml_model.model_name
53+
assert reloaded_model.connection_name == bq_connection
54+
assert reloaded_model.model_name == model_name
55+
56+
57+
@pytest.mark.parametrize(
58+
"model_name",
59+
(
60+
"gemini-2.0-flash-exp",
61+
"gemini-2.0-flash-001",
62+
"gemini-2.0-flash-lite-001",
63+
"gemini-2.5-pro",
64+
"gemini-2.5-flash",
65+
"gemini-2.5-flash-lite",
66+
),
67+
)
68+
# @pytest.mark.flaky(retries=2)
69+
def test_gemini_text_generator_predict_default_params_success(
70+
llm_text_df, model_name, session, bq_connection
71+
):
72+
gemini_text_generator_model = llm.GeminiTextGenerator(
73+
model_name=model_name, connection_name=bq_connection, session=session
74+
)
75+
df = gemini_text_generator_model.predict(llm_text_df).to_pandas()
76+
utils.check_pandas_df_schema_and_index(
77+
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
78+
)
79+
80+
81+
@pytest.mark.parametrize(
82+
"model_name",
83+
(
84+
"gemini-2.0-flash-exp",
85+
"gemini-2.0-flash-001",
86+
"gemini-2.0-flash-lite-001",
87+
"gemini-2.5-pro",
88+
"gemini-2.5-flash",
89+
"gemini-2.5-flash-lite",
90+
),
91+
)
92+
@pytest.mark.flaky(retries=2)
93+
def test_gemini_text_generator_predict_with_params_success(
94+
llm_text_df, model_name, session, bq_connection
95+
):
96+
gemini_text_generator_model = llm.GeminiTextGenerator(
97+
model_name=model_name, connection_name=bq_connection, session=session
98+
)
99+
df = gemini_text_generator_model.predict(
100+
llm_text_df, temperature=0.5, max_output_tokens=100, top_k=20, top_p=0.5
101+
).to_pandas()
102+
utils.check_pandas_df_schema_and_index(
103+
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
104+
)
105+
106+
107+
@pytest.mark.parametrize(
108+
"model_name",
109+
(
110+
"gemini-2.0-flash-exp",
111+
"gemini-2.0-flash-001",
112+
"gemini-2.0-flash-lite-001",
113+
"gemini-2.5-pro",
114+
"gemini-2.5-flash",
115+
"gemini-2.5-flash-lite",
116+
),
117+
)
118+
@pytest.mark.flaky(retries=2)
119+
def test_gemini_text_generator_multi_cols_predict_success(
120+
llm_text_df: bpd.DataFrame, model_name, session, bq_connection
121+
):
122+
df = llm_text_df.assign(additional_col=1)
123+
gemini_text_generator_model = llm.GeminiTextGenerator(
124+
model_name=model_name, connection_name=bq_connection, session=session
125+
)
126+
pd_df = gemini_text_generator_model.predict(df).to_pandas()
127+
utils.check_pandas_df_schema_and_index(
128+
pd_df,
129+
columns=utils.ML_GENERATE_TEXT_OUTPUT + ["additional_col"],
130+
index=3,
131+
col_exact=False,
132+
)
133+
134+
135+
@pytest.mark.parametrize(
136+
"model_name",
137+
(
138+
"gemini-2.0-flash-exp",
139+
"gemini-2.0-flash-001",
140+
"gemini-2.0-flash-lite-001",
141+
"gemini-2.5-pro",
142+
"gemini-2.5-flash",
143+
"gemini-2.5-flash-lite",
144+
),
145+
)
146+
@pytest.mark.flaky(retries=2)
147+
def test_gemini_text_generator_predict_output_schema_success(
148+
llm_text_df: bpd.DataFrame, model_name, session, bq_connection
149+
):
150+
gemini_text_generator_model = llm.GeminiTextGenerator(
151+
model_name=model_name, connection_name=bq_connection, session=session
152+
)
153+
output_schema = {
154+
"bool_output": "bool",
155+
"int_output": "int64",
156+
"float_output": "float64",
157+
"str_output": "string",
158+
"array_output": "array<int64>",
159+
"struct_output": "struct<number int64>",
160+
}
161+
df = gemini_text_generator_model.predict(llm_text_df, output_schema=output_schema)
162+
assert df["bool_output"].dtype == pd.BooleanDtype()
163+
assert df["int_output"].dtype == pd.Int64Dtype()
164+
assert df["float_output"].dtype == pd.Float64Dtype()
165+
assert df["str_output"].dtype == pd.StringDtype(storage="pyarrow")
166+
assert df["array_output"].dtype == pd.ArrowDtype(pa.list_(pa.int64()))
167+
assert df["struct_output"].dtype == pd.ArrowDtype(
168+
pa.struct([("number", pa.int64())])
169+
)
170+
171+
pd_df = df.to_pandas()
172+
utils.check_pandas_df_schema_and_index(
173+
pd_df,
174+
columns=list(output_schema.keys()) + ["prompt", "full_response", "status"],
175+
index=3,
176+
col_exact=False,
177+
)
178+
179+
180+
@pytest.mark.flaky(retries=2)
181+
@pytest.mark.parametrize(
182+
"model_name",
183+
(
184+
"gemini-2.0-flash-001",
185+
"gemini-2.0-flash-lite-001",
186+
),
187+
)
188+
def test_llm_gemini_score(llm_fine_tune_df_default_index, model_name):
189+
model = llm.GeminiTextGenerator(model_name=model_name)
190+
191+
# Check score to ensure the model was fitted
192+
score_result = model.score(
193+
X=llm_fine_tune_df_default_index[["prompt"]],
194+
y=llm_fine_tune_df_default_index[["label"]],
195+
).to_pandas()
196+
utils.check_pandas_df_schema_and_index(
197+
score_result,
198+
columns=[
199+
"bleu4_score",
200+
"rouge-l_precision",
201+
"rouge-l_recall",
202+
"rouge-l_f1_score",
203+
"evaluation_status",
204+
],
205+
index=1,
206+
)
207+
208+
209+
@pytest.mark.parametrize(
210+
"model_name",
211+
(
212+
"gemini-2.0-flash-001",
213+
"gemini-2.0-flash-lite-001",
214+
),
215+
)
216+
def test_llm_gemini_pro_score_params(llm_fine_tune_df_default_index, model_name):
217+
model = llm.GeminiTextGenerator(model_name=model_name)
218+
219+
# Check score to ensure the model was fitted
220+
score_result = model.score(
221+
X=llm_fine_tune_df_default_index["prompt"],
222+
y=llm_fine_tune_df_default_index["label"],
223+
task_type="classification",
224+
).to_pandas()
225+
utils.check_pandas_df_schema_and_index(
226+
score_result,
227+
columns=[
228+
"precision",
229+
"recall",
230+
"f1_score",
231+
"label",
232+
"evaluation_status",
233+
],
234+
)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pandas as pd
16+
import pyarrow as pa
17+
import pytest
18+
19+
from bigframes.ml import llm
20+
import bigframes.pandas as bpd
21+
from bigframes.testing import utils
22+
23+
24+
@pytest.mark.parametrize(
25+
"model_name",
26+
(
27+
"gemini-2.0-flash-exp",
28+
"gemini-2.0-flash-001",
29+
"gemini-2.0-flash-lite-001",
30+
),
31+
)
32+
@pytest.mark.flaky(retries=2)
33+
def test_gemini_text_generator_multimodal_input(
34+
images_mm_df: bpd.DataFrame, model_name, session, bq_connection
35+
):
36+
gemini_text_generator_model = llm.GeminiTextGenerator(
37+
model_name=model_name, connection_name=bq_connection, session=session
38+
)
39+
pd_df = gemini_text_generator_model.predict(
40+
images_mm_df, prompt=["Describe", images_mm_df["blob_col"]]
41+
).to_pandas()
42+
utils.check_pandas_df_schema_and_index(
43+
pd_df,
44+
columns=utils.ML_GENERATE_TEXT_OUTPUT + ["blob_col"],
45+
index=2,
46+
col_exact=False,
47+
)

tests/system/small/ml/test_linear_model.py

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -228,42 +228,6 @@ def test_to_gbq_saved_linear_reg_model_scores(
228228
)
229229

230230

231-
def test_linear_reg_model_global_explain(
232-
penguins_linear_model_w_global_explain, new_penguins_df
233-
):
234-
training_data = new_penguins_df.dropna(subset=["body_mass_g"])
235-
X = training_data.drop(columns=["body_mass_g"])
236-
y = training_data[["body_mass_g"]]
237-
penguins_linear_model_w_global_explain.fit(X, y)
238-
global_ex = penguins_linear_model_w_global_explain.global_explain()
239-
assert global_ex.shape == (6, 1)
240-
expected_columns = pandas.Index(["attribution"])
241-
pandas.testing.assert_index_equal(global_ex.columns, expected_columns)
242-
result = global_ex.to_pandas().drop(["attribution"], axis=1).sort_index()
243-
expected_feature = (
244-
pandas.DataFrame(
245-
{
246-
"feature": [
247-
"island",
248-
"species",
249-
"sex",
250-
"flipper_length_mm",
251-
"culmen_depth_mm",
252-
"culmen_length_mm",
253-
]
254-
},
255-
)
256-
.set_index("feature")
257-
.sort_index()
258-
)
259-
pandas.testing.assert_frame_equal(
260-
result,
261-
expected_feature,
262-
check_exact=False,
263-
check_index_type=False,
264-
)
265-
266-
267231
def test_to_gbq_replace(penguins_linear_model, table_id_unique):
268232
penguins_linear_model.to_gbq(table_id_unique, replace=True)
269233
with pytest.raises(google.api_core.exceptions.Conflict):

0 commit comments

Comments
 (0)