fix duration tests

TrevorBergeron · TrevorBergeron · commit b8a2d1969b95 · 2025-07-01T00:06:38.000Z
diff --git a/bigframes/core/pyarrow_utils.py b/bigframes/core/pyarrow_utils.py
@@ -74,6 +74,13 @@ def chunk_by_row_count(
         yield buffer.take_as_batches(len(buffer))
 
 
+def cast_batch(batch: pa.RecordBatch, schema: pa.Schema) -> pa.RecordBatch:
+    if batch.schema == schema:
+        return batch
+    # Newer pyarrow versions can directly cast batches, but older supported versions do not.
+    return pa.Table.from_batches([batch]).cast(schema).to_batches()[0]
+
+
 def truncate_pyarrow_iterable(
     batches: Iterable[pa.RecordBatch], max_results: int
 ) -> Iterator[pa.RecordBatch]:
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
@@ -50,6 +50,7 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]:
         result_rows = 0
 
         for batch in self._arrow_batches:
+            batch = pyarrow_utils.cast_batch(batch, self.schema.to_pyarrow())
             result_rows += batch.num_rows
 
             maximum_result_rows = bigframes.options.compute.maximum_result_rows
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -1695,6 +1695,7 @@ def test_get_dtypes(scalars_df_default_index):
         "string_col": pd.StringDtype(storage="pyarrow"),
         "time_col": pd.ArrowDtype(pa.time64("us")),
         "timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
+        "duration_col": pd.ArrowDtype(pa.duration("us")),
     }
     pd.testing.assert_series_equal(
         dtypes,
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
@@ -55,7 +55,7 @@ def test_sql_executes(scalars_df_default_index, bigquery_client):
     """
     # Do some operations to make for more complex SQL.
     df = (
-        scalars_df_default_index.drop(columns=["geography_col"])
+        scalars_df_default_index.drop(columns=["geography_col", "duration_col"])
         .groupby("string_col")
         .max()
     )
@@ -87,7 +87,7 @@ def test_sql_executes_and_includes_named_index(
     """
     # Do some operations to make for more complex SQL.
     df = (
-        scalars_df_default_index.drop(columns=["geography_col"])
+        scalars_df_default_index.drop(columns=["geography_col", "duration_col"])
         .groupby("string_col")
         .max()
     )
@@ -120,7 +120,7 @@ def test_sql_executes_and_includes_named_multiindex(
     """
     # Do some operations to make for more complex SQL.
     df = (
-        scalars_df_default_index.drop(columns=["geography_col"])
+        scalars_df_default_index.drop(columns=["geography_col", "duration_col"])
         .groupby(["string_col", "bool_col"])
         .max()
     )

Original file line number	Diff line number	Diff line change
`@@ -1695,6 +1695,7 @@ def test_get_dtypes(scalars_df_default_index):`
`1695`	`1695`	`"string_col": pd.StringDtype(storage="pyarrow"),`
`1696`	`1696`	`"time_col": pd.ArrowDtype(pa.time64("us")),`
`1697`	`1697`	`"timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")),`
	`1698`	`+ "duration_col": pd.ArrowDtype(pa.duration("us")),`
`1698`	`1699`	`}`
`1699`	`1700`	`pd.testing.assert_series_equal(`
`1700`	`1701`	`dtypes,`