googleapis · tswast · Jun 30, 2025 · Jun 30, 2025 · Jun 30, 2025
@@ -42,18 +42,6 @@
         17486432.0,
         1919625975.0,
     ],
-    "num_materialized_or_scanned_rows": [
-        0.0,
-        6.0,
-        100.0,
-        4955.0,
-        23108.0,
-        139504.0,
-        616341.0,
-        3855698.0,
-        83725698.0,
-        5991998082.0,
-    ],
     "avg_row_bytes": [
         0.00014346299635435792,
         0.005370969708923197,
@@ -524,10 +512,11 @@ def main():
         for i in range(num_percentiles):
             percentile = TABLE_STATS["percentile"][i]
             avg_row_bytes_raw = TABLE_STATS["avg_row_bytes"][i]
-            num_rows_raw = TABLE_STATS["num_materialized_or_scanned_rows"][i]
+            table_bytes_raw = TABLE_STATS["materialized_or_scanned_bytes"][i]
 
+            target_table_bytes = max(1, int(math.ceil(table_bytes_raw)))
             target_row_bytes = max(1, int(math.ceil(avg_row_bytes_raw)))
-            num_rows = max(1, int(math.ceil(num_rows_raw)))
+            num_rows = max(1, int(math.ceil(target_table_bytes / target_row_bytes)))
 
             table_name = f"percentile_{percentile:02d}"
             print(f"\n--- Processing Table: {table_name} ---")

@@ -44,7 +44,7 @@ def aggregate_output(
     df_aggregated = (
         df.assign(rounded=df[group_column].astype("Int64").round(-9))
         .groupby("rounded")
-        .sum()
+        .sum(numeric_only=True)
     )
 
     df_aggregated.shape

@@ -14,6 +14,7 @@
 import pathlib
 
 import benchmark.utils as utils
+import pytest
 
 import bigframes.session
 
@@ -35,8 +36,15 @@ def filter_output(
 
     # Simulate the user filtering by a column and visualizing those results
     df_filtered = df[df["col_bool_0"]]
-    df_filtered.shape
-    next(iter(df_filtered.to_pandas_batches(page_size=PAGE_SIZE)))
+    rows, _ = df_filtered.shape
+
+    # It's possible we don't have any pages at all, since we filtered out all
+    # matching rows.
+    if rows == 0:
+        with pytest.raises(StopIteration):
+            next(iter(df_filtered.to_pandas_batches(page_size=PAGE_SIZE)))
+    else:
+        next(iter(df_filtered.to_pandas_batches(page_size=PAGE_SIZE)))
 
 
 if __name__ == "__main__":