Skip to content

Commit 62d8274

Browse files
committed
remove unnecessary execution in benchmark
1 parent 444510d commit 62d8274

File tree

4 files changed

+11
-63
lines changed

4 files changed

+11
-63
lines changed

tests/benchmark/read_gbq_colab/filter_output.py

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import pathlib
1515

1616
import bigframes.pandas as bpd
17-
import bigframes.session.execution_spec
1817
import tests.benchmark.utils as utils
1918

2019
PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
@@ -30,32 +29,16 @@ def filter_output(
3029
# e.g. "{local_inline}" or "{local_large}"
3130
df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
3231

33-
# Call the executor directly to isolate the query execution time
34-
# from other DataFrame overhead for this benchmark.
35-
execute_result = df._block.session._executor.execute(
36-
df._block.expr,
37-
execution_spec=bigframes.session.execution_spec.ExecutionSpec(
38-
ordered=True, promise_under_10gb=False
39-
),
40-
)
41-
batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
32+
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
4233
next(iter(batches))
4334

4435
# Simulate the user filtering by a column and visualizing those results
4536
df_filtered = df[df["col_bool_0"]]
46-
# Force BigQuery execution for filtered DataFrame to get total_rows metadata
47-
execute_result_filtered = df_filtered._block.session._executor.execute(
48-
df_filtered._block.expr,
49-
execution_spec=bigframes.session.execution_spec.ExecutionSpec(
50-
ordered=True, promise_under_10gb=False
51-
),
52-
)
37+
batches_filtered = df_filtered.to_pandas_batches(page_size=PAGE_SIZE)
5338

54-
rows = execute_result_filtered.total_rows or 0
39+
rows = batches_filtered.total_rows or 0
5540
assert rows >= 0
5641

57-
batches_filtered = execute_result_filtered.to_pandas_batches(page_size=PAGE_SIZE)
58-
5942
# It's possible we don't have any pages at all, since we filtered out all
6043
# matching rows.
6144
first_page = next(iter(batches_filtered))

tests/benchmark/read_gbq_colab/first_page.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import pathlib
1515

1616
import bigframes.pandas
17-
import bigframes.session.execution_spec
1817
import tests.benchmark.utils as utils
1918

2019
PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
@@ -27,16 +26,8 @@ def first_page(*, project_id, dataset_id, table_id):
2726
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
2827
)
2928

30-
# Call the executor directly to isolate the query execution time
31-
# from other DataFrame overhead for this benchmark.
32-
execute_result = df._block.session._executor.execute(
33-
df._block.expr,
34-
execution_spec=bigframes.session.execution_spec.ExecutionSpec(
35-
ordered=True, promise_under_10gb=False
36-
),
37-
)
38-
assert execute_result.total_rows is not None and execute_result.total_rows >= 0
39-
batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
29+
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
30+
assert batches.total_rows is not None and batches.total_rows >= 0
4031
first_page = next(iter(batches))
4132
assert first_page is not None
4233

tests/benchmark/read_gbq_colab/last_page.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,8 @@ def last_page(*, project_id, dataset_id, table_id):
2626
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
2727
)
2828

29-
# Call the executor directly to isolate the query execution time
30-
# from other DataFrame overhead for this benchmark.
31-
execute_result = df._block.session._executor.execute(
32-
df._block.expr,
33-
execution_spec=bigframes.session.execution_spec.ExecutionSpec(
34-
ordered=True, promise_under_10gb=False
35-
),
36-
)
37-
assert execute_result.total_rows is not None and execute_result.total_rows >= 0
38-
batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
29+
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
30+
assert batches.total_rows is not None and batches.total_rows >= 0
3931
for _ in batches:
4032
pass
4133

tests/benchmark/read_gbq_colab/sort_output.py

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import pathlib
1515

1616
import bigframes.pandas
17-
import bigframes.session.execution_spec
1817
import tests.benchmark.utils as utils
1918

2019
PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
@@ -27,16 +26,8 @@ def sort_output(*, project_id, dataset_id, table_id):
2726
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
2827
)
2928

30-
# Call the executor directly to isolate the query execution time
31-
# from other DataFrame overhead for this benchmark.
32-
execute_result = df._block.session._executor.execute(
33-
df._block.expr,
34-
execution_spec=bigframes.session.execution_spec.ExecutionSpec(
35-
ordered=True, promise_under_10gb=False
36-
),
37-
)
38-
assert execute_result.total_rows is not None and execute_result.total_rows >= 0
39-
batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
29+
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
30+
assert batches.total_rows is not None and batches.total_rows >= 0
4031
next(iter(batches))
4132

4233
# Simulate the user sorting by a column and visualizing those results
@@ -45,17 +36,8 @@ def sort_output(*, project_id, dataset_id, table_id):
4536
sort_column = "col_bool_0"
4637

4738
df_sorted = df.sort_values(sort_column)
48-
execute_result_sorted = df_sorted._block.session._executor.execute(
49-
df_sorted._block.expr,
50-
execution_spec=bigframes.session.execution_spec.ExecutionSpec(
51-
ordered=True, promise_under_10gb=False
52-
),
53-
)
54-
assert (
55-
execute_result_sorted.total_rows is not None
56-
and execute_result_sorted.total_rows >= 0
57-
)
58-
batches_sorted = execute_result_sorted.to_pandas_batches(page_size=PAGE_SIZE)
39+
batches_sorted = df_sorted.to_pandas_batches(page_size=PAGE_SIZE)
40+
assert batches_sorted.total_rows is not None and batches_sorted.total_rows >= 0
5941
next(iter(batches_sorted))
6042

6143

0 commit comments

Comments
 (0)