Skip to content

Commit a66f7f2

Browse files
committed
revert changes to tests/benchmark/read_gbq_colab
1 parent 86698c0 commit a66f7f2

File tree

5 files changed

+26
-22
lines changed

5 files changed

+26
-22
lines changed

tests/benchmark/read_gbq_colab/aggregate_output.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ def aggregate_output(*, project_id, dataset_id, table_id):
2626
df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
2727

2828
# Simulate getting the first page, since we'll always do that first in the UI.
29-
df.shape
30-
next(iter(df.to_pandas_batches(page_size=PAGE_SIZE)))
29+
batches = df._to_pandas_batches(page_size=PAGE_SIZE)
30+
assert (tr := batches.total_rows) is not None and tr >= 0
31+
next(iter(batches))
3132

3233
# To simulate very small rows that can only fit a boolean,
3334
# some tables don't have an integer column. If an integer column is available,
@@ -43,8 +44,9 @@ def aggregate_output(*, project_id, dataset_id, table_id):
4344
.sum(numeric_only=True)
4445
)
4546

46-
df_aggregated.shape
47-
next(iter(df_aggregated.to_pandas_batches(page_size=PAGE_SIZE)))
47+
batches = df_aggregated._to_pandas_batches(page_size=PAGE_SIZE)
48+
assert (tr := batches.total_rows) is not None and tr >= 0
49+
next(iter(batches))
4850

4951

5052
if __name__ == "__main__":

tests/benchmark/read_gbq_colab/filter_output.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,20 @@ def filter_output(
3030
# e.g. "{local_inline}" or "{local_large}"
3131
df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
3232

33-
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
33+
# Simulate getting the first page, since we'll always do that first in the UI.
34+
batches = df._to_pandas_batches(page_size=PAGE_SIZE)
35+
assert (tr := batches.total_rows) is not None and tr >= 0
3436
next(iter(batches))
3537

3638
# Simulate the user filtering by a column and visualizing those results
3739
df_filtered = df[df["col_bool_0"]]
38-
batches_filtered = df_filtered.to_pandas_batches(page_size=PAGE_SIZE)
39-
40-
rows = batches_filtered.total_rows or 0
41-
assert rows >= 0
40+
batches = df_filtered._to_pandas_batches(page_size=PAGE_SIZE)
41+
assert (tr := batches.total_rows) is not None and tr >= 0
42+
first_page = next(iter(batches))
4243

4344
# It's possible we don't have any pages at all, since we filtered out all
4445
# matching rows.
45-
first_page = next(iter(batches_filtered))
46-
assert len(first_page.index) <= rows
46+
assert len(first_page.index) <= tr
4747

4848

4949
if __name__ == "__main__":

tests/benchmark/read_gbq_colab/first_page.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ def first_page(*, project_id, dataset_id, table_id):
2727
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
2828
)
2929

30-
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
31-
assert batches.total_rows is not None and batches.total_rows >= 0
32-
first_page = next(iter(batches))
33-
assert first_page is not None
30+
# Get number of rows (to calculate number of pages) and the first page.
31+
batches = df._to_pandas_batches(page_size=PAGE_SIZE)
32+
assert (tr := batches.total_rows) is not None and tr >= 0
33+
next(iter(batches))
3434

3535

3636
if __name__ == "__main__":

tests/benchmark/read_gbq_colab/last_page.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@ def last_page(*, project_id, dataset_id, table_id):
2727
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
2828
)
2929

30-
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
31-
assert batches.total_rows is not None and batches.total_rows >= 0
30+
# Get number of rows (to calculate number of pages) and then all pages.
31+
batches = df._to_pandas_batches(page_size=PAGE_SIZE)
32+
assert (tr := batches.total_rows) is not None and tr >= 0
3233
for _ in batches:
3334
pass
3435

tests/benchmark/read_gbq_colab/sort_output.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@ def sort_output(*, project_id, dataset_id, table_id):
2727
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
2828
)
2929

30-
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
31-
assert batches.total_rows is not None and batches.total_rows >= 0
30+
# Simulate getting the first page, since we'll always do that first in the UI.
31+
batches = df._to_pandas_batches(page_size=PAGE_SIZE)
32+
assert (tr := batches.total_rows) is not None and tr >= 0
3233
next(iter(batches))
3334

3435
# Simulate the user sorting by a column and visualizing those results
@@ -37,9 +38,9 @@ def sort_output(*, project_id, dataset_id, table_id):
3738
sort_column = "col_bool_0"
3839

3940
df_sorted = df.sort_values(sort_column)
40-
batches_sorted = df_sorted.to_pandas_batches(page_size=PAGE_SIZE)
41-
assert batches_sorted.total_rows is not None and batches_sorted.total_rows >= 0
42-
next(iter(batches_sorted))
41+
batches = df_sorted._to_pandas_batches(page_size=PAGE_SIZE)
42+
assert (tr := batches.total_rows) is not None and tr >= 0
43+
next(iter(batches))
4344

4445

4546
if __name__ == "__main__":

0 commit comments

Comments
 (0)