googleapis
diff --git a/‎noxfile.py‎
Lines changed: 5 additions & 6 deletions b/‎noxfile.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎tests/system/small/operations/test_plotting.py‎
Lines changed: 4 additions & 7 deletions b/‎tests/system/small/operations/test_plotting.py‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎tests/system/small/test_dataframe.py‎
Lines changed: 0 additions & 10 deletions b/‎tests/system/small/test_dataframe.py‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎tests/system/small/test_dataframe_io.py‎
Lines changed: 15 additions & 28 deletions b/‎tests/system/small/test_dataframe_io.py‎
Lines changed: 15 additions & 28 deletions
diff --git a/‎tests/system/small/test_encryption.py‎
Lines changed: 18 additions & 18 deletions b/‎tests/system/small/test_encryption.py‎
Lines changed: 18 additions & 18 deletions
diff --git a/‎tests/system/small/test_multiindex.py‎
Lines changed: 0 additions & 19 deletions b/‎tests/system/small/test_multiindex.py‎
Lines changed: 0 additions & 19 deletions
diff --git a/‎tests/system/small/test_pandas.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/system/small/test_pandas.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/system/small/test_progress_bar.py‎
Lines changed: 0 additions & 10 deletions b/‎tests/system/small/test_progress_bar.py‎
Lines changed: 0 additions & 10 deletions
@@ -112,8 +112,7 @@ def lint(session):
         "--check",
         *LINT_PATHS,
     )
-    # TODO(tswast): lint all LINT_PATHS
-    session.run("flake8", "bigframes", "tests")
+    session.run("flake8", *LINT_PATHS)
 
 
 @nox.session(python=DEFAULT_PYTHON_VERSION)
@@ -411,8 +410,8 @@ def samples(session):
         CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
     )
 
-    # TODO(swast): Use `requirements.txt` files from the samples directories to
-    # test samples.
+    # TODO(b/332735129): Remove this session and use python_samples templates
+    # where each samples directory has its own noxfile.py file, instead.
     install_test_extra = True
     install_systemtest_dependencies(session, install_test_extra, "-c", constraints_path)
 
@@ -434,12 +433,12 @@ def cover(session):
     session.run("coverage", "report", "--show-missing", "--fail-under=90")
 
     # Make sure there is no dead code in our test directories.
-    # TODO(swast): Cleanup dead code in the system tests directory.
     session.run(
         "coverage",
         "report",
         "--show-missing",
         "--include=tests/unit/*",
+        "--include=tests/system/small/*",
         "--fail-under=100",
     )
 
@@ -714,7 +713,7 @@ def notebook(session: nox.Session):
         "notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb",  # Needs DATASET.
         "notebooks/regression/bq_dataframes_ml_linear_regression.ipynb",  # Needs DATASET_ID.
         "notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb",  # Needs CONNECTION.
-        # TODO(swast): investigate why we get 404 errors, even though
+        # TODO(b/332737009): investigate why we get 404 errors, even though
         # bq_dataframes_llm_code_generation creates a bucket in the sample.
         "notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb",  # Needs BUCKET_URI.
         "notebooks/generative_ai/sentiment_analysis.ipynb",  # Too slow
 
@@ -27,13 +27,10 @@ def _check_legend_labels(ax, labels):
     """
     assert ax.get_legend() is not None
     texts = ax.get_legend().get_texts()
-    if not isinstance(texts, list):
-        assert texts.get_text() == labels
-    else:
-        actual_labels = [t.get_text() for t in texts]
-        assert len(actual_labels) == len(labels)
-        for label, e in zip(actual_labels, labels):
-            assert label == e
+    actual_labels = [t.get_text() for t in texts]
+    assert len(actual_labels) == len(labels)
+    for label, e in zip(actual_labels, labels):
+        assert label == e
 
 
 def test_series_hist_bins(scalars_dfs):
 
@@ -524,13 +524,6 @@ def test_repr_w_all_rows(scalars_dfs):
     scalars_df = scalars_df.drop(columns=["numeric_col"])
     scalars_pandas_df = scalars_pandas_df.drop(columns=["numeric_col"])
 
-    if scalars_pandas_df.index.name is None:
-        # Note: Not quite the same as no index / default index, but hopefully
-        # simulates it well enough while being consistent enough for string
-        # comparison to work.
-        scalars_df = scalars_df.set_index("rowindex", drop=False).sort_index()
-        scalars_df.index.name = None
-
     # When there are 10 or fewer rows, the outputs should be identical.
     actual = repr(scalars_df.head(10))
 
@@ -3956,9 +3949,6 @@ def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna):
         ("bottom", "dense", False, False),
     ],
 )
-@pytest.mark.skipif(
-    True, reason="Blocked by possible pandas rank() regression (b/283278923)"
-)
 def test_df_rank_with_nulls(
     scalars_df_index,
     scalars_pandas_df_index,
 
@@ -23,7 +23,8 @@
 
 try:
     import pandas_gbq  # type: ignore
-except ImportError:
+except ImportError:  # pragma: NO COVER
+    # TODO(b/332758806): Run system tests without "extras"
     pandas_gbq = None
 
 import typing
@@ -129,12 +130,9 @@ def test_to_csv_index(
     """Test the `to_csv` API with the `index` parameter."""
     scalars_df, scalars_pandas_df = scalars_dfs
     index_col = None
-    if scalars_df.index.name is not None:
-        path = gcs_folder + f"test_index_df_to_csv_index_{index}*.csv"
-        if index:
-            index_col = typing.cast(str, scalars_df.index.name)
-    else:
-        path = gcs_folder + f"test_default_index_df_to_csv_index_{index}*.csv"
+    path = gcs_folder + f"test_index_df_to_csv_index_{index}*.csv"
+    if index:
+        index_col = typing.cast(str, scalars_df.index.name)
 
     # TODO(swast): Support "date_format" parameter and make sure our
     # DATETIME/TIMESTAMP column export is the same format as pandas by default.
@@ -386,11 +384,8 @@ def test_to_json_index_invalid_orient(
     gcs_folder: str,
     index: bool,
 ):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    if scalars_df.index.name is not None:
-        path = gcs_folder + f"test_index_df_to_json_index_{index}*.jsonl"
-    else:
-        path = gcs_folder + f"test_default_index_df_to_json_index_{index}*.jsonl"
+    scalars_df, _ = scalars_dfs
+    path = gcs_folder + f"test_index_df_to_json_index_{index}*.jsonl"
     with pytest.raises(ValueError):
         scalars_df.to_json(path, index=index, lines=True)
 
@@ -404,11 +399,8 @@ def test_to_json_index_invalid_lines(
     gcs_folder: str,
     index: bool,
 ):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    if scalars_df.index.name is not None:
-        path = gcs_folder + f"test_index_df_to_json_index_{index}.jsonl"
-    else:
-        path = gcs_folder + f"test_default_index_df_to_json_index_{index}.jsonl"
+    scalars_df, _ = scalars_dfs
+    path = gcs_folder + f"test_index_df_to_json_index_{index}.jsonl"
     with pytest.raises(NotImplementedError):
         scalars_df.to_json(path, index=index)
 
@@ -422,14 +414,13 @@ def test_to_json_index_records_orient(
     gcs_folder: str,
     index: bool,
 ):
-    """Test the `to_json` API with the `index` parameter."""
+    """Test the `to_json` API with the `index` parameter.
+
+    Uses the scalable options orient='records' and lines=True.
+    """
     scalars_df, scalars_pandas_df = scalars_dfs
-    if scalars_df.index.name is not None:
-        path = gcs_folder + f"test_index_df_to_json_index_{index}*.jsonl"
-    else:
-        path = gcs_folder + f"test_default_index_df_to_json_index_{index}*.jsonl"
+    path = gcs_folder + f"test_index_df_to_json_index_{index}*.jsonl"
 
-    """ Test the `to_json` API with `orient` is `records` and `lines` is True"""
     scalars_df.to_json(path, index=index, orient="records", lines=True)
 
     gcs_df = pd.read_json(
@@ -460,11 +451,7 @@ def test_to_parquet_index(scalars_dfs, gcs_folder, index):
     """Test the `to_parquet` API with the `index` parameter."""
     scalars_df, scalars_pandas_df = scalars_dfs
     scalars_pandas_df = scalars_pandas_df.copy()
-
-    if scalars_df.index.name is not None:
-        path = gcs_folder + f"test_index_df_to_parquet_{index}*.parquet"
-    else:
-        path = gcs_folder + f"test_default_index_df_to_parquet_{index}*.parquet"
+    path = gcs_folder + f"test_index_df_to_parquet_{index}*.parquet"
 
     # TODO(b/268693993): Type GEOGRAPHY is not currently supported for parquet.
     scalars_df = scalars_df.drop(columns="geography_col")
 
@@ -64,8 +64,8 @@ def _assert_bq_table_is_encrypted(
 
 
 def test_session_query_job(bq_cmek, session_with_bq_cmek):
-    if not bq_cmek:
-        pytest.skip("no cmek set for testing")
+    if not bq_cmek:  # pragma: NO COVER
+        pytest.skip("no cmek set for testing")  # pragma: NO COVER
 
     _, query_job = session_with_bq_cmek._start_query(
         "SELECT 123", job_config=bigquery.QueryJobConfig(use_query_cache=False)
@@ -82,8 +82,8 @@ def test_session_query_job(bq_cmek, session_with_bq_cmek):
 
 
 def test_session_load_job(bq_cmek, session_with_bq_cmek):
-    if not bq_cmek:
-        pytest.skip("no cmek set for testing")
+    if not bq_cmek:  # pragma: NO COVER
+        pytest.skip("no cmek set for testing")  # pragma: NO COVER
 
     # Session should have cmek set in the default query and load job configs
     load_table = bigframes.session._io.bigquery.random_table(
@@ -114,8 +114,8 @@ def test_session_load_job(bq_cmek, session_with_bq_cmek):
 
 
 def test_read_gbq(bq_cmek, session_with_bq_cmek, scalars_table_id):
-    if not bq_cmek:
-        pytest.skip("no cmek set for testing")
+    if not bq_cmek:  # pragma: NO COVER
+        pytest.skip("no cmek set for testing")  # pragma: NO COVER
 
     # Read the BQ table
     df = session_with_bq_cmek.read_gbq(scalars_table_id)
@@ -125,8 +125,8 @@ def test_read_gbq(bq_cmek, session_with_bq_cmek, scalars_table_id):
 
 
 def test_df_apis(bq_cmek, session_with_bq_cmek, scalars_table_id):
-    if not bq_cmek:
-        pytest.skip("no cmek set for testing")
+    if not bq_cmek:  # pragma: NO COVER
+        pytest.skip("no cmek set for testing")  # pragma: NO COVER
 
     # Read a BQ table and assert encryption
     df = session_with_bq_cmek.read_gbq(scalars_table_id)
@@ -152,8 +152,8 @@ def test_df_apis(bq_cmek, session_with_bq_cmek, scalars_table_id):
 def test_read_csv_gcs(
     bq_cmek, session_with_bq_cmek, scalars_df_index, gcs_folder, engine
 ):
-    if not bq_cmek:
-        pytest.skip("no cmek set for testing")
+    if not bq_cmek:  # pragma: NO COVER
+        pytest.skip("no cmek set for testing")  # pragma: NO COVER
 
     # Create a csv in gcs
     write_path = gcs_folder + "test_read_csv_gcs_bigquery_engine*.csv"
@@ -170,8 +170,8 @@ def test_read_csv_gcs(
 
 
 def test_to_gbq(bq_cmek, session_with_bq_cmek, scalars_table_id):
-    if not bq_cmek:
-        pytest.skip("no cmek set for testing")
+    if not bq_cmek:  # pragma: NO COVER
+        pytest.skip("no cmek set for testing")  # pragma: NO COVER
 
     # Read a BQ table and assert encryption
     df = session_with_bq_cmek.read_gbq(scalars_table_id)
@@ -205,8 +205,8 @@ def test_to_gbq(bq_cmek, session_with_bq_cmek, scalars_table_id):
 
 
 def test_read_pandas(bq_cmek, session_with_bq_cmek):
-    if not bq_cmek:
-        pytest.skip("no cmek set for testing")
+    if not bq_cmek:  # pragma: NO COVER
+        pytest.skip("no cmek set for testing")  # pragma: NO COVER
 
     # Read a pandas dataframe
     df = session_with_bq_cmek.read_pandas(pandas.DataFrame([1]))
@@ -216,8 +216,8 @@ def test_read_pandas(bq_cmek, session_with_bq_cmek):
 
 
 def test_read_pandas_large(bq_cmek, session_with_bq_cmek):
-    if not bq_cmek:
-        pytest.skip("no cmek set for testing")
+    if not bq_cmek:  # pragma: NO COVER
+        pytest.skip("no cmek set for testing")  # pragma: NO COVER
 
     # Read a pandas dataframe large enough to trigger a BQ load job
     df = session_with_bq_cmek.read_pandas(pandas.DataFrame(range(10_000)))
@@ -227,8 +227,8 @@ def test_read_pandas_large(bq_cmek, session_with_bq_cmek):
 
 
 def test_bqml(bq_cmek, session_with_bq_cmek, penguins_table_id):
-    if not bq_cmek:
-        pytest.skip("no cmek set for testing")
+    if not bq_cmek:  # pragma: NO COVER
+        pytest.skip("no cmek set for testing")  # pragma: NO COVER
 
     model = bigframes.ml.linear_model.LinearRegression()
     df = session_with_bq_cmek.read_gbq(penguins_table_id).dropna()
 
@@ -882,25 +882,6 @@ def test_column_multi_index_unstack(scalars_df_index, scalars_pandas_df_index):
     pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
 
 
-@pytest.mark.skip(reason="Pandas fails in newer versions.")
-def test_column_multi_index_w_na_stack(scalars_df_index, scalars_pandas_df_index):
-    columns = ["int64_too", "int64_col", "rowindex_2"]
-    level1 = pandas.Index(["b", pandas.NA, pandas.NA])
-    # Need resulting column to be pyarrow string rather than object dtype
-    level2 = pandas.Index([pandas.NA, "b", "b"], dtype="string[pyarrow]")
-    multi_columns = pandas.MultiIndex.from_arrays([level1, level2])
-    bf_df = scalars_df_index[columns].copy()
-    bf_df.columns = multi_columns
-    pd_df = scalars_pandas_df_index[columns].copy()
-    pd_df.columns = multi_columns
-
-    bf_result = bf_df.stack().to_pandas()
-    pd_result = pd_df.stack()
-
-    # Pandas produces NaN, where bq dataframes produces pd.NA
-    pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
-
-
 def test_corr_w_multi_index(scalars_df_index, scalars_pandas_df_index):
     columns = ["int64_too", "float64_col", "int64_col"]
     multi_columns = pandas.MultiIndex.from_tuples(zip(["a", "b", "b"], [1, 2, 2]))
 
@@ -136,7 +136,7 @@ def test_get_dummies_series(scalars_dfs):
 
     # adjust for expected dtype differences
     for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes):
-        if type_name == "bool":
+        if type_name == "bool":  # pragma: NO COVER
             pd_result[column_name] = pd_result[column_name].astype("boolean")
     pd_result.columns = pd_result.columns.astype(object)
 
@@ -157,7 +157,7 @@ def test_get_dummies_series_nameless(scalars_dfs):
 
     # adjust for expected dtype differences
     for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes):
-        if type_name == "bool":
+        if type_name == "bool":  # pragma: NO COVER
             pd_result[column_name] = pd_result[column_name].astype("boolean")
     pd_result.columns = pd_result.columns.astype(object)
 
 
@@ -126,13 +126,3 @@ def test_query_job_repr(penguins_df_default_index: bf.dataframe.DataFrame):
     ]
     for string in string_checks:
         assert string in query_job_repr
-
-
-def test_query_job_dry_run(penguins_df_default_index: bf.dataframe.DataFrame, capsys):
-    with bf.option_context("display.repr_mode", "deferred"):
-        repr(penguins_df_default_index)
-        repr(penguins_df_default_index["body_mass_g"])
-        lines = capsys.readouterr().out.split("\n")
-        lines = filter(None, lines)
-        for line in lines:
-            assert "Computation deferred. Computation will process" in line