Skip to content

Commit 57e1cca

Browse files
authored
chore: add context and address some TODOs in noxfile (#572)
* chore: add context and address some TODOs in noxfile * fix lint * remove dead code * mark test_query_job_dry_run as flaky * remove more dead code * escape [ * remove failing test * missing NO COVER
1 parent c8da22f commit 57e1cca

File tree

13 files changed

+81
-181
lines changed

13 files changed

+81
-181
lines changed

noxfile.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,7 @@ def lint(session):
112112
"--check",
113113
*LINT_PATHS,
114114
)
115-
# TODO(tswast): lint all LINT_PATHS
116-
session.run("flake8", "bigframes", "tests")
115+
session.run("flake8", *LINT_PATHS)
117116

118117

119118
@nox.session(python=DEFAULT_PYTHON_VERSION)
@@ -411,8 +410,8 @@ def samples(session):
411410
CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
412411
)
413412

414-
# TODO(swast): Use `requirements.txt` files from the samples directories to
415-
# test samples.
413+
# TODO(b/332735129): Remove this session and use python_samples templates
414+
# where each samples directory has its own noxfile.py file, instead.
416415
install_test_extra = True
417416
install_systemtest_dependencies(session, install_test_extra, "-c", constraints_path)
418417

@@ -434,12 +433,12 @@ def cover(session):
434433
session.run("coverage", "report", "--show-missing", "--fail-under=90")
435434

436435
# Make sure there is no dead code in our test directories.
437-
# TODO(swast): Cleanup dead code in the system tests directory.
438436
session.run(
439437
"coverage",
440438
"report",
441439
"--show-missing",
442440
"--include=tests/unit/*",
441+
"--include=tests/system/small/*",
443442
"--fail-under=100",
444443
)
445444

@@ -714,7 +713,7 @@ def notebook(session: nox.Session):
714713
"notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb", # Needs DATASET.
715714
"notebooks/regression/bq_dataframes_ml_linear_regression.ipynb", # Needs DATASET_ID.
716715
"notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb", # Needs CONNECTION.
717-
# TODO(swast): investigate why we get 404 errors, even though
716+
# TODO(b/332737009): investigate why we get 404 errors, even though
718717
# bq_dataframes_llm_code_generation creates a bucket in the sample.
719718
"notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb", # Needs BUCKET_URI.
720719
"notebooks/generative_ai/sentiment_analysis.ipynb", # Too slow

tests/system/small/operations/test_plotting.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,10 @@ def _check_legend_labels(ax, labels):
2727
"""
2828
assert ax.get_legend() is not None
2929
texts = ax.get_legend().get_texts()
30-
if not isinstance(texts, list):
31-
assert texts.get_text() == labels
32-
else:
33-
actual_labels = [t.get_text() for t in texts]
34-
assert len(actual_labels) == len(labels)
35-
for label, e in zip(actual_labels, labels):
36-
assert label == e
30+
actual_labels = [t.get_text() for t in texts]
31+
assert len(actual_labels) == len(labels)
32+
for label, e in zip(actual_labels, labels):
33+
assert label == e
3734

3835

3936
def test_series_hist_bins(scalars_dfs):

tests/system/small/test_dataframe.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -524,13 +524,6 @@ def test_repr_w_all_rows(scalars_dfs):
524524
scalars_df = scalars_df.drop(columns=["numeric_col"])
525525
scalars_pandas_df = scalars_pandas_df.drop(columns=["numeric_col"])
526526

527-
if scalars_pandas_df.index.name is None:
528-
# Note: Not quite the same as no index / default index, but hopefully
529-
# simulates it well enough while being consistent enough for string
530-
# comparison to work.
531-
scalars_df = scalars_df.set_index("rowindex", drop=False).sort_index()
532-
scalars_df.index.name = None
533-
534527
# When there are 10 or fewer rows, the outputs should be identical.
535528
actual = repr(scalars_df.head(10))
536529

@@ -3956,9 +3949,6 @@ def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna):
39563949
("bottom", "dense", False, False),
39573950
],
39583951
)
3959-
@pytest.mark.skipif(
3960-
True, reason="Blocked by possible pandas rank() regression (b/283278923)"
3961-
)
39623952
def test_df_rank_with_nulls(
39633953
scalars_df_index,
39643954
scalars_pandas_df_index,

tests/system/small/test_dataframe_io.py

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323

2424
try:
2525
import pandas_gbq # type: ignore
26-
except ImportError:
26+
except ImportError: # pragma: NO COVER
27+
# TODO(b/332758806): Run system tests without "extras"
2728
pandas_gbq = None
2829

2930
import typing
@@ -129,12 +130,9 @@ def test_to_csv_index(
129130
"""Test the `to_csv` API with the `index` parameter."""
130131
scalars_df, scalars_pandas_df = scalars_dfs
131132
index_col = None
132-
if scalars_df.index.name is not None:
133-
path = gcs_folder + f"test_index_df_to_csv_index_{index}*.csv"
134-
if index:
135-
index_col = typing.cast(str, scalars_df.index.name)
136-
else:
137-
path = gcs_folder + f"test_default_index_df_to_csv_index_{index}*.csv"
133+
path = gcs_folder + f"test_index_df_to_csv_index_{index}*.csv"
134+
if index:
135+
index_col = typing.cast(str, scalars_df.index.name)
138136

139137
# TODO(swast): Support "date_format" parameter and make sure our
140138
# DATETIME/TIMESTAMP column export is the same format as pandas by default.
@@ -386,11 +384,8 @@ def test_to_json_index_invalid_orient(
386384
gcs_folder: str,
387385
index: bool,
388386
):
389-
scalars_df, scalars_pandas_df = scalars_dfs
390-
if scalars_df.index.name is not None:
391-
path = gcs_folder + f"test_index_df_to_json_index_{index}*.jsonl"
392-
else:
393-
path = gcs_folder + f"test_default_index_df_to_json_index_{index}*.jsonl"
387+
scalars_df, _ = scalars_dfs
388+
path = gcs_folder + f"test_index_df_to_json_index_{index}*.jsonl"
394389
with pytest.raises(ValueError):
395390
scalars_df.to_json(path, index=index, lines=True)
396391

@@ -404,11 +399,8 @@ def test_to_json_index_invalid_lines(
404399
gcs_folder: str,
405400
index: bool,
406401
):
407-
scalars_df, scalars_pandas_df = scalars_dfs
408-
if scalars_df.index.name is not None:
409-
path = gcs_folder + f"test_index_df_to_json_index_{index}.jsonl"
410-
else:
411-
path = gcs_folder + f"test_default_index_df_to_json_index_{index}.jsonl"
402+
scalars_df, _ = scalars_dfs
403+
path = gcs_folder + f"test_index_df_to_json_index_{index}.jsonl"
412404
with pytest.raises(NotImplementedError):
413405
scalars_df.to_json(path, index=index)
414406

@@ -422,14 +414,13 @@ def test_to_json_index_records_orient(
422414
gcs_folder: str,
423415
index: bool,
424416
):
425-
"""Test the `to_json` API with the `index` parameter."""
417+
"""Test the `to_json` API with the `index` parameter.
418+
419+
Uses the scalable options orient='records' and lines=True.
420+
"""
426421
scalars_df, scalars_pandas_df = scalars_dfs
427-
if scalars_df.index.name is not None:
428-
path = gcs_folder + f"test_index_df_to_json_index_{index}*.jsonl"
429-
else:
430-
path = gcs_folder + f"test_default_index_df_to_json_index_{index}*.jsonl"
422+
path = gcs_folder + f"test_index_df_to_json_index_{index}*.jsonl"
431423

432-
""" Test the `to_json` API with `orient` is `records` and `lines` is True"""
433424
scalars_df.to_json(path, index=index, orient="records", lines=True)
434425

435426
gcs_df = pd.read_json(
@@ -460,11 +451,7 @@ def test_to_parquet_index(scalars_dfs, gcs_folder, index):
460451
"""Test the `to_parquet` API with the `index` parameter."""
461452
scalars_df, scalars_pandas_df = scalars_dfs
462453
scalars_pandas_df = scalars_pandas_df.copy()
463-
464-
if scalars_df.index.name is not None:
465-
path = gcs_folder + f"test_index_df_to_parquet_{index}*.parquet"
466-
else:
467-
path = gcs_folder + f"test_default_index_df_to_parquet_{index}*.parquet"
454+
path = gcs_folder + f"test_index_df_to_parquet_{index}*.parquet"
468455

469456
# TODO(b/268693993): Type GEOGRAPHY is not currently supported for parquet.
470457
scalars_df = scalars_df.drop(columns="geography_col")

tests/system/small/test_encryption.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ def _assert_bq_table_is_encrypted(
6464

6565

6666
def test_session_query_job(bq_cmek, session_with_bq_cmek):
67-
if not bq_cmek:
68-
pytest.skip("no cmek set for testing")
67+
if not bq_cmek: # pragma: NO COVER
68+
pytest.skip("no cmek set for testing") # pragma: NO COVER
6969

7070
_, query_job = session_with_bq_cmek._start_query(
7171
"SELECT 123", job_config=bigquery.QueryJobConfig(use_query_cache=False)
@@ -82,8 +82,8 @@ def test_session_query_job(bq_cmek, session_with_bq_cmek):
8282

8383

8484
def test_session_load_job(bq_cmek, session_with_bq_cmek):
85-
if not bq_cmek:
86-
pytest.skip("no cmek set for testing")
85+
if not bq_cmek: # pragma: NO COVER
86+
pytest.skip("no cmek set for testing") # pragma: NO COVER
8787

8888
# Session should have cmek set in the default query and load job configs
8989
load_table = bigframes.session._io.bigquery.random_table(
@@ -114,8 +114,8 @@ def test_session_load_job(bq_cmek, session_with_bq_cmek):
114114

115115

116116
def test_read_gbq(bq_cmek, session_with_bq_cmek, scalars_table_id):
117-
if not bq_cmek:
118-
pytest.skip("no cmek set for testing")
117+
if not bq_cmek: # pragma: NO COVER
118+
pytest.skip("no cmek set for testing") # pragma: NO COVER
119119

120120
# Read the BQ table
121121
df = session_with_bq_cmek.read_gbq(scalars_table_id)
@@ -125,8 +125,8 @@ def test_read_gbq(bq_cmek, session_with_bq_cmek, scalars_table_id):
125125

126126

127127
def test_df_apis(bq_cmek, session_with_bq_cmek, scalars_table_id):
128-
if not bq_cmek:
129-
pytest.skip("no cmek set for testing")
128+
if not bq_cmek: # pragma: NO COVER
129+
pytest.skip("no cmek set for testing") # pragma: NO COVER
130130

131131
# Read a BQ table and assert encryption
132132
df = session_with_bq_cmek.read_gbq(scalars_table_id)
@@ -152,8 +152,8 @@ def test_df_apis(bq_cmek, session_with_bq_cmek, scalars_table_id):
152152
def test_read_csv_gcs(
153153
bq_cmek, session_with_bq_cmek, scalars_df_index, gcs_folder, engine
154154
):
155-
if not bq_cmek:
156-
pytest.skip("no cmek set for testing")
155+
if not bq_cmek: # pragma: NO COVER
156+
pytest.skip("no cmek set for testing") # pragma: NO COVER
157157

158158
# Create a csv in gcs
159159
write_path = gcs_folder + "test_read_csv_gcs_bigquery_engine*.csv"
@@ -170,8 +170,8 @@ def test_read_csv_gcs(
170170

171171

172172
def test_to_gbq(bq_cmek, session_with_bq_cmek, scalars_table_id):
173-
if not bq_cmek:
174-
pytest.skip("no cmek set for testing")
173+
if not bq_cmek: # pragma: NO COVER
174+
pytest.skip("no cmek set for testing") # pragma: NO COVER
175175

176176
# Read a BQ table and assert encryption
177177
df = session_with_bq_cmek.read_gbq(scalars_table_id)
@@ -205,8 +205,8 @@ def test_to_gbq(bq_cmek, session_with_bq_cmek, scalars_table_id):
205205

206206

207207
def test_read_pandas(bq_cmek, session_with_bq_cmek):
208-
if not bq_cmek:
209-
pytest.skip("no cmek set for testing")
208+
if not bq_cmek: # pragma: NO COVER
209+
pytest.skip("no cmek set for testing") # pragma: NO COVER
210210

211211
# Read a pandas dataframe
212212
df = session_with_bq_cmek.read_pandas(pandas.DataFrame([1]))
@@ -216,8 +216,8 @@ def test_read_pandas(bq_cmek, session_with_bq_cmek):
216216

217217

218218
def test_read_pandas_large(bq_cmek, session_with_bq_cmek):
219-
if not bq_cmek:
220-
pytest.skip("no cmek set for testing")
219+
if not bq_cmek: # pragma: NO COVER
220+
pytest.skip("no cmek set for testing") # pragma: NO COVER
221221

222222
# Read a pandas dataframe large enough to trigger a BQ load job
223223
df = session_with_bq_cmek.read_pandas(pandas.DataFrame(range(10_000)))
@@ -227,8 +227,8 @@ def test_read_pandas_large(bq_cmek, session_with_bq_cmek):
227227

228228

229229
def test_bqml(bq_cmek, session_with_bq_cmek, penguins_table_id):
230-
if not bq_cmek:
231-
pytest.skip("no cmek set for testing")
230+
if not bq_cmek: # pragma: NO COVER
231+
pytest.skip("no cmek set for testing") # pragma: NO COVER
232232

233233
model = bigframes.ml.linear_model.LinearRegression()
234234
df = session_with_bq_cmek.read_gbq(penguins_table_id).dropna()

tests/system/small/test_multiindex.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -882,25 +882,6 @@ def test_column_multi_index_unstack(scalars_df_index, scalars_pandas_df_index):
882882
pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
883883

884884

885-
@pytest.mark.skip(reason="Pandas fails in newer versions.")
886-
def test_column_multi_index_w_na_stack(scalars_df_index, scalars_pandas_df_index):
887-
columns = ["int64_too", "int64_col", "rowindex_2"]
888-
level1 = pandas.Index(["b", pandas.NA, pandas.NA])
889-
# Need resulting column to be pyarrow string rather than object dtype
890-
level2 = pandas.Index([pandas.NA, "b", "b"], dtype="string[pyarrow]")
891-
multi_columns = pandas.MultiIndex.from_arrays([level1, level2])
892-
bf_df = scalars_df_index[columns].copy()
893-
bf_df.columns = multi_columns
894-
pd_df = scalars_pandas_df_index[columns].copy()
895-
pd_df.columns = multi_columns
896-
897-
bf_result = bf_df.stack().to_pandas()
898-
pd_result = pd_df.stack()
899-
900-
# Pandas produces NaN, where bq dataframes produces pd.NA
901-
pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
902-
903-
904885
def test_corr_w_multi_index(scalars_df_index, scalars_pandas_df_index):
905886
columns = ["int64_too", "float64_col", "int64_col"]
906887
multi_columns = pandas.MultiIndex.from_tuples(zip(["a", "b", "b"], [1, 2, 2]))

tests/system/small/test_pandas.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def test_get_dummies_series(scalars_dfs):
136136

137137
# adjust for expected dtype differences
138138
for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes):
139-
if type_name == "bool":
139+
if type_name == "bool": # pragma: NO COVER
140140
pd_result[column_name] = pd_result[column_name].astype("boolean")
141141
pd_result.columns = pd_result.columns.astype(object)
142142

@@ -157,7 +157,7 @@ def test_get_dummies_series_nameless(scalars_dfs):
157157

158158
# adjust for expected dtype differences
159159
for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes):
160-
if type_name == "bool":
160+
if type_name == "bool": # pragma: NO COVER
161161
pd_result[column_name] = pd_result[column_name].astype("boolean")
162162
pd_result.columns = pd_result.columns.astype(object)
163163

tests/system/small/test_progress_bar.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,3 @@ def test_query_job_repr(penguins_df_default_index: bf.dataframe.DataFrame):
126126
]
127127
for string in string_checks:
128128
assert string in query_job_repr
129-
130-
131-
def test_query_job_dry_run(penguins_df_default_index: bf.dataframe.DataFrame, capsys):
132-
with bf.option_context("display.repr_mode", "deferred"):
133-
repr(penguins_df_default_index)
134-
repr(penguins_df_default_index["body_mass_g"])
135-
lines = capsys.readouterr().out.split("\n")
136-
lines = filter(None, lines)
137-
for line in lines:
138-
assert "Computation deferred. Computation will process" in line

0 commit comments

Comments
 (0)