fix remaining tests

TrevorBergeron · TrevorBergeron · commit 9d3377a03584 · 2025-07-01T18:14:06.000Z
diff --git a/bigframes/core/local_data.py b/bigframes/core/local_data.py
@@ -30,6 +30,7 @@
 import pyarrow as pa
 import pyarrow.parquet  # type: ignore
 
+from bigframes.core import pyarrow_utils
 import bigframes.core.schema as schemata
 import bigframes.dtypes
 
@@ -113,7 +114,9 @@ def to_arrow(
         schema = self.data.schema
         if duration_type == "int":
             schema = _schema_durations_to_ints(schema)
-            batches = map(functools.partial(_cast_pa_batch, schema=schema), batches)
+            batches = map(
+                functools.partial(pyarrow_utils.cast_batch, schema=schema), batches
+            )
 
         if offsets_col is not None:
             return schema.append(pa.field(offsets_col, pa.int64())), _append_offsets(
@@ -468,14 +471,6 @@ def _schema_durations_to_ints(schema: pa.Schema) -> pa.Schema:
     )
 
 
-# TODO: Use RecordBatch.cast once min pyarrow>=16.0
-def _cast_pa_batch(batch: pa.RecordBatch, schema: pa.Schema) -> pa.RecordBatch:
-    return pa.record_batch(
-        [arr.cast(type) for arr, type in zip(batch.columns, schema.types)],
-        schema=schema,
-    )
-
-
 def _pairwise(iterable):
     do_yield = False
     a = None
diff --git a/bigframes/core/pyarrow_utils.py b/bigframes/core/pyarrow_utils.py
@@ -77,8 +77,11 @@ def chunk_by_row_count(
 def cast_batch(batch: pa.RecordBatch, schema: pa.Schema) -> pa.RecordBatch:
     if batch.schema == schema:
         return batch
-    # Newer pyarrow versions can directly cast batches, but older supported versions do not.
-    return pa.Table.from_batches([batch]).cast(schema).to_batches()[0]
+    # TODO: Use RecordBatch.cast once min pyarrow>=16.0
+    return pa.record_batch(
+        [arr.cast(type) for arr, type in zip(batch.columns, schema.types)],
+        schema=schema,
+    )
 
 
 def truncate_pyarrow_iterable(
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
@@ -1006,7 +1006,9 @@ def test_to_sql_query_unnamed_index_included(
     assert idx_labels[0] is None
     assert idx_ids[0].startswith("bigframes")
 
-    pd_df = scalars_pandas_df_default_index.reset_index(drop=True)
+    pd_df = scalars_pandas_df_default_index.reset_index(drop=True).drop(
+        columns="duration_col"
+    )
     roundtrip = session.read_gbq(sql, index_col=idx_ids)
     roundtrip.index.names = [None]
     utils.assert_pandas_df_equal(roundtrip.to_pandas(), pd_df, check_index_type=False)
@@ -1026,7 +1028,9 @@ def test_to_sql_query_named_index_included(
     assert idx_labels[0] == "rowindex_2"
     assert idx_ids[0] == "rowindex_2"
 
-    pd_df = scalars_pandas_df_default_index.set_index("rowindex_2", drop=True)
+    pd_df = scalars_pandas_df_default_index.set_index("rowindex_2", drop=True).drop(
+        columns="duration_col"
+    )
     roundtrip = session.read_gbq(sql, index_col=idx_ids)
     utils.assert_pandas_df_equal(roundtrip.to_pandas(), pd_df)
 
@@ -1041,7 +1045,9 @@ def test_to_sql_query_unnamed_index_excluded(
     assert len(idx_labels) == 0
     assert len(idx_ids) == 0
 
-    pd_df = scalars_pandas_df_default_index.reset_index(drop=True)
+    pd_df = scalars_pandas_df_default_index.reset_index(drop=True).drop(
+        columns="duration_col"
+    )
     roundtrip = session.read_gbq(sql)
     utils.assert_pandas_df_equal(
         roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True
@@ -1060,9 +1066,11 @@ def test_to_sql_query_named_index_excluded(
     assert len(idx_labels) == 0
     assert len(idx_ids) == 0
 
-    pd_df = scalars_pandas_df_default_index.set_index(
-        "rowindex_2", drop=True
-    ).reset_index(drop=True)
+    pd_df = (
+        scalars_pandas_df_default_index.set_index("rowindex_2", drop=True)
+        .reset_index(drop=True)
+        .drop(columns="duration_col")
+    )
     roundtrip = session.read_gbq(sql)
     utils.assert_pandas_df_equal(
         roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
@@ -1977,16 +1977,14 @@ def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder):
 
     # The auto detects of BigQuery load job have restrictions to detect the bytes,
     # numeric and geometry types, so they're skipped here.
-    df = df.drop(columns=["bytes_col", "numeric_col", "geography_col"])
+    df = df.drop(columns=["bytes_col", "numeric_col", "geography_col", "duration_col"])
     scalars_df = scalars_df.drop(
         columns=["bytes_col", "numeric_col", "geography_col", "duration_col"]
     )
 
     # pandas read_json does not respect the dtype overrides for these columns
     df = df.drop(columns=["date_col", "datetime_col", "time_col"])
-    scalars_df = scalars_df.drop(
-        columns=["date_col", "datetime_col", "time_col", "duration_col"]
-    )
+    scalars_df = scalars_df.drop(columns=["date_col", "datetime_col", "time_col"])
 
     assert df.shape[0] == scalars_df.shape[0]
     pd.testing.assert_series_equal(df.dtypes, scalars_df.dtypes)