revert changes to tests/benchmark/read_gbq_colab

shuoweil · shuoweil · commit a66f7f252d73 · 2025-10-22T00:11:33.000Z
diff --git a/tests/benchmark/read_gbq_colab/aggregate_output.py b/tests/benchmark/read_gbq_colab/aggregate_output.py
@@ -26,8 +26,9 @@ def aggregate_output(*, project_id, dataset_id, table_id):
     df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
 
     # Simulate getting the first page, since we'll always do that first in the UI.
-    df.shape
-    next(iter(df.to_pandas_batches(page_size=PAGE_SIZE)))
+    batches = df._to_pandas_batches(page_size=PAGE_SIZE)
+    assert (tr := batches.total_rows) is not None and tr >= 0
+    next(iter(batches))
 
     # To simulate very small rows that can only fit a boolean,
     # some tables don't have an integer column. If an integer column is available,
@@ -43,8 +44,9 @@ def aggregate_output(*, project_id, dataset_id, table_id):
         .sum(numeric_only=True)
     )
 
-    df_aggregated.shape
-    next(iter(df_aggregated.to_pandas_batches(page_size=PAGE_SIZE)))
+    batches = df_aggregated._to_pandas_batches(page_size=PAGE_SIZE)
+    assert (tr := batches.total_rows) is not None and tr >= 0
+    next(iter(batches))
 
 
 if __name__ == "__main__":
diff --git a/tests/benchmark/read_gbq_colab/filter_output.py b/tests/benchmark/read_gbq_colab/filter_output.py
@@ -30,20 +30,20 @@ def filter_output(
     # e.g. "{local_inline}" or "{local_large}"
     df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
 
-    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
+    # Simulate getting the first page, since we'll always do that first in the UI.
+    batches = df._to_pandas_batches(page_size=PAGE_SIZE)
+    assert (tr := batches.total_rows) is not None and tr >= 0
     next(iter(batches))
 
     # Simulate the user filtering by a column and visualizing those results
     df_filtered = df[df["col_bool_0"]]
-    batches_filtered = df_filtered.to_pandas_batches(page_size=PAGE_SIZE)
-
-    rows = batches_filtered.total_rows or 0
-    assert rows >= 0
+    batches = df_filtered._to_pandas_batches(page_size=PAGE_SIZE)
+    assert (tr := batches.total_rows) is not None and tr >= 0
+    first_page = next(iter(batches))
 
     # It's possible we don't have any pages at all, since we filtered out all
     # matching rows.
-    first_page = next(iter(batches_filtered))
-    assert len(first_page.index) <= rows
+    assert len(first_page.index) <= tr
 
 
 if __name__ == "__main__":
diff --git a/tests/benchmark/read_gbq_colab/first_page.py b/tests/benchmark/read_gbq_colab/first_page.py
@@ -27,10 +27,10 @@ def first_page(*, project_id, dataset_id, table_id):
         f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
     )
 
-    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
-    assert batches.total_rows is not None and batches.total_rows >= 0
-    first_page = next(iter(batches))
-    assert first_page is not None
+    # Get number of rows (to calculate number of pages) and the first page.
+    batches = df._to_pandas_batches(page_size=PAGE_SIZE)
+    assert (tr := batches.total_rows) is not None and tr >= 0
+    next(iter(batches))
 
 
 if __name__ == "__main__":
diff --git a/tests/benchmark/read_gbq_colab/last_page.py b/tests/benchmark/read_gbq_colab/last_page.py
@@ -27,8 +27,9 @@ def last_page(*, project_id, dataset_id, table_id):
         f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
     )
 
-    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
-    assert batches.total_rows is not None and batches.total_rows >= 0
+    # Get number of rows (to calculate number of pages) and then all pages.
+    batches = df._to_pandas_batches(page_size=PAGE_SIZE)
+    assert (tr := batches.total_rows) is not None and tr >= 0
     for _ in batches:
         pass
 
diff --git a/tests/benchmark/read_gbq_colab/sort_output.py b/tests/benchmark/read_gbq_colab/sort_output.py
@@ -27,8 +27,9 @@ def sort_output(*, project_id, dataset_id, table_id):
         f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
     )
 
-    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
-    assert batches.total_rows is not None and batches.total_rows >= 0
+    # Simulate getting the first page, since we'll always do that first in the UI.
+    batches = df._to_pandas_batches(page_size=PAGE_SIZE)
+    assert (tr := batches.total_rows) is not None and tr >= 0
     next(iter(batches))
 
     # Simulate the user sorting by a column and visualizing those results
@@ -37,9 +38,9 @@ def sort_output(*, project_id, dataset_id, table_id):
         sort_column = "col_bool_0"
 
     df_sorted = df.sort_values(sort_column)
-    batches_sorted = df_sorted.to_pandas_batches(page_size=PAGE_SIZE)
-    assert batches_sorted.total_rows is not None and batches_sorted.total_rows >= 0
-    next(iter(batches_sorted))
+    batches = df_sorted._to_pandas_batches(page_size=PAGE_SIZE)
+    assert (tr := batches.total_rows) is not None and tr >= 0
+    next(iter(batches))
 
 
 if __name__ == "__main__":