remove unnecessary execution in benchmark

shuoweil · shuoweil · commit 62d82743bd8a · 2025-10-15T21:04:15.000Z
diff --git a/tests/benchmark/read_gbq_colab/filter_output.py b/tests/benchmark/read_gbq_colab/filter_output.py
@@ -14,7 +14,6 @@
 import pathlib
 
 import bigframes.pandas as bpd
-import bigframes.session.execution_spec
 import tests.benchmark.utils as utils
 
 PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
@@ -30,32 +29,16 @@ def filter_output(
     # e.g. "{local_inline}" or "{local_large}"
     df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
 
-    # Call the executor directly to isolate the query execution time
-    # from other DataFrame overhead for this benchmark.
-    execute_result = df._block.session._executor.execute(
-        df._block.expr,
-        execution_spec=bigframes.session.execution_spec.ExecutionSpec(
-            ordered=True, promise_under_10gb=False
-        ),
-    )
-    batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
+    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
     next(iter(batches))
 
     # Simulate the user filtering by a column and visualizing those results
     df_filtered = df[df["col_bool_0"]]
-    # Force BigQuery execution for filtered DataFrame to get total_rows metadata
-    execute_result_filtered = df_filtered._block.session._executor.execute(
-        df_filtered._block.expr,
-        execution_spec=bigframes.session.execution_spec.ExecutionSpec(
-            ordered=True, promise_under_10gb=False
-        ),
-    )
+    batches_filtered = df_filtered.to_pandas_batches(page_size=PAGE_SIZE)
 
-    rows = execute_result_filtered.total_rows or 0
+    rows = batches_filtered.total_rows or 0
     assert rows >= 0
 
-    batches_filtered = execute_result_filtered.to_pandas_batches(page_size=PAGE_SIZE)
-
     # It's possible we don't have any pages at all, since we filtered out all
     # matching rows.
     first_page = next(iter(batches_filtered))
diff --git a/tests/benchmark/read_gbq_colab/first_page.py b/tests/benchmark/read_gbq_colab/first_page.py
@@ -14,7 +14,6 @@
 import pathlib
 
 import bigframes.pandas
-import bigframes.session.execution_spec
 import tests.benchmark.utils as utils
 
 PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
@@ -27,16 +26,8 @@ def first_page(*, project_id, dataset_id, table_id):
         f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
     )
 
-    # Call the executor directly to isolate the query execution time
-    # from other DataFrame overhead for this benchmark.
-    execute_result = df._block.session._executor.execute(
-        df._block.expr,
-        execution_spec=bigframes.session.execution_spec.ExecutionSpec(
-            ordered=True, promise_under_10gb=False
-        ),
-    )
-    assert execute_result.total_rows is not None and execute_result.total_rows >= 0
-    batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
+    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
+    assert batches.total_rows is not None and batches.total_rows >= 0
     first_page = next(iter(batches))
     assert first_page is not None
 
diff --git a/tests/benchmark/read_gbq_colab/last_page.py b/tests/benchmark/read_gbq_colab/last_page.py
@@ -26,16 +26,8 @@ def last_page(*, project_id, dataset_id, table_id):
         f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
     )
 
-    # Call the executor directly to isolate the query execution time
-    # from other DataFrame overhead for this benchmark.
-    execute_result = df._block.session._executor.execute(
-        df._block.expr,
-        execution_spec=bigframes.session.execution_spec.ExecutionSpec(
-            ordered=True, promise_under_10gb=False
-        ),
-    )
-    assert execute_result.total_rows is not None and execute_result.total_rows >= 0
-    batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
+    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
+    assert batches.total_rows is not None and batches.total_rows >= 0
     for _ in batches:
         pass
 
diff --git a/tests/benchmark/read_gbq_colab/sort_output.py b/tests/benchmark/read_gbq_colab/sort_output.py
@@ -14,7 +14,6 @@
 import pathlib
 
 import bigframes.pandas
-import bigframes.session.execution_spec
 import tests.benchmark.utils as utils
 
 PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
@@ -27,16 +26,8 @@ def sort_output(*, project_id, dataset_id, table_id):
         f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
     )
 
-    # Call the executor directly to isolate the query execution time
-    # from other DataFrame overhead for this benchmark.
-    execute_result = df._block.session._executor.execute(
-        df._block.expr,
-        execution_spec=bigframes.session.execution_spec.ExecutionSpec(
-            ordered=True, promise_under_10gb=False
-        ),
-    )
-    assert execute_result.total_rows is not None and execute_result.total_rows >= 0
-    batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
+    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
+    assert batches.total_rows is not None and batches.total_rows >= 0
     next(iter(batches))
 
     # Simulate the user sorting by a column and visualizing those results
@@ -45,17 +36,8 @@ def sort_output(*, project_id, dataset_id, table_id):
         sort_column = "col_bool_0"
 
     df_sorted = df.sort_values(sort_column)
-    execute_result_sorted = df_sorted._block.session._executor.execute(
-        df_sorted._block.expr,
-        execution_spec=bigframes.session.execution_spec.ExecutionSpec(
-            ordered=True, promise_under_10gb=False
-        ),
-    )
-    assert (
-        execute_result_sorted.total_rows is not None
-        and execute_result_sorted.total_rows >= 0
-    )
-    batches_sorted = execute_result_sorted.to_pandas_batches(page_size=PAGE_SIZE)
+    batches_sorted = df_sorted.to_pandas_batches(page_size=PAGE_SIZE)
+    assert batches_sorted.total_rows is not None and batches_sorted.total_rows >= 0
     next(iter(batches_sorted))