Skip to content

Commit 1d5ee0e

Browse files
committed
change code and update more testcase
1 parent a6a8a3f commit 1d5ee0e

File tree

6 files changed

+29
-33
lines changed

6 files changed

+29
-33
lines changed

bigframes/display/anywidget.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -72,26 +72,27 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
7272
self._table_id = str(uuid.uuid4())
7373
self._all_data_loaded = False
7474
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
75-
self._batches: Optional[bigframes.core.blocks.PandasBatches] = None
7675
self._cached_batches: List[pd.DataFrame] = []
7776

7877
# Respect display options for initial page size
7978
initial_page_size = bigframes.options.display.max_rows
8079

81-
# Fetches initial data batches and row count for display.
8280
batches = dataframe.to_pandas_batches(
8381
page_size=initial_page_size,
8482
)
85-
self._batches = cast(bigframes.core.blocks.PandasBatches, batches)
83+
self._batches: bigframes.core.blocks.PandasBatches = cast(
84+
bigframes.core.blocks.PandasBatches, batches
85+
)
8686

87-
# Use total_rwos from batches directly
87+
# The query issued by `to_pandas_batches()` already contains metadata
88+
# about how many results there were. Use that to avoid doing an extra
89+
# COUNT(*) query that `len(...)` would do.
8890
self.row_count = self._batches.total_rows or 0
8991

9092
# Set page_size after _batches is available since traitlets observers
9193
# may depend on _batches being initialized when the change trigger happens
9294
self.page_size = initial_page_size
9395

94-
# Generates the initial HTML table content
9596
self._set_table_html()
9697

9798
@functools.cached_property
@@ -182,11 +183,7 @@ def _get_next_batch(self) -> bool:
182183
@property
183184
def _batch_iterator(self) -> Iterator[pd.DataFrame]:
184185
"""Lazily initializes and returns the batch iterator."""
185-
if self._batch_iter is None:
186-
if self._batches is None:
187-
self._batch_iter = iter([])
188-
else:
189-
self._batch_iter = iter(self._batches)
186+
self._batch_iter = iter(self._batches)
190187
return self._batch_iter
191188

192189
@property

tests/benchmark/read_gbq_colab/aggregate_output.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ def aggregate_output(*, project_id, dataset_id, table_id):
2626
df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
2727

2828
# Simulate getting the first page, since we'll always do that first in the UI.
29-
batches = df._to_pandas_batches(page_size=PAGE_SIZE)
30-
assert (tr := batches.total_rows) is not None and tr >= 0
29+
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
3130
next(iter(batches))
3231

3332
# To simulate very small rows that can only fit a boolean,
@@ -44,9 +43,8 @@ def aggregate_output(*, project_id, dataset_id, table_id):
4443
.sum(numeric_only=True)
4544
)
4645

47-
batches = df_aggregated._to_pandas_batches(page_size=PAGE_SIZE)
48-
assert (tr := batches.total_rows) is not None and tr >= 0
49-
next(iter(batches))
46+
batches_aggregated = df_aggregated.to_pandas_batches(page_size=PAGE_SIZE)
47+
next(iter(batches_aggregated))
5048

5149

5250
if __name__ == "__main__":

tests/benchmark/read_gbq_colab/filter_output.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,19 @@ def filter_output(
3131
df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
3232

3333
# Simulate getting the first page, since we'll always do that first in the UI.
34-
batches = df._to_pandas_batches(page_size=PAGE_SIZE)
35-
assert (tr := batches.total_rows) is not None and tr >= 0
34+
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
3635
next(iter(batches))
3736

3837
# Simulate the user filtering by a column and visualizing those results
3938
df_filtered = df[df["col_bool_0"]]
40-
batches = df_filtered._to_pandas_batches(page_size=PAGE_SIZE)
41-
assert (tr := batches.total_rows) is not None and tr >= 0
42-
first_page = next(iter(batches))
39+
batches_filtered = df_filtered.to_pandas_batches(page_size=PAGE_SIZE)
4340

4441
# It's possible we don't have any pages at all, since we filtered out all
4542
# matching rows.
46-
assert len(first_page.index) <= tr
43+
first_page = next(iter(batches_filtered))
44+
rows = batches_filtered.total_rows
45+
assert rows is not None
46+
assert len(first_page.index) <= rows
4747

4848

4949
if __name__ == "__main__":

tests/benchmark/read_gbq_colab/first_page.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
import pathlib
15+
import typing
1516

1617
import benchmark.utils as utils
1718

@@ -27,10 +28,12 @@ def first_page(*, project_id, dataset_id, table_id):
2728
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
2829
)
2930

30-
# Use total_rows from batches directly and the first page
31-
execute_result = df._block.session._executor.execute(df._block.expr, ordered=True)
32-
execute_result.total_rows or 0
33-
next(iter(df.to_pandas_batches(page_size=PAGE_SIZE)))
31+
# Get number of rows (to calculate number of pages) and the first page.
32+
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
33+
first_page = next(iter(batches))
34+
assert first_page is not None
35+
total_rows = typing.cast(typing.Any, batches).total_rows
36+
assert total_rows is not None
3437

3538

3639
if __name__ == "__main__":

tests/benchmark/read_gbq_colab/last_page.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ def last_page(*, project_id, dataset_id, table_id):
2727
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
2828
)
2929

30-
execute_result = df._block.session._executor.execute(df._block.expr, ordered=True)
31-
execute_result.total_rows or 0
32-
for _ in df.to_pandas_batches(page_size=PAGE_SIZE):
30+
# Get number of rows (to calculate number of pages) and then all pages.
31+
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
32+
for _ in batches:
3333
pass
3434

3535

tests/benchmark/read_gbq_colab/sort_output.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ def sort_output(*, project_id, dataset_id, table_id):
2828
)
2929

3030
# Simulate getting the first page, since we'll always do that first in the UI.
31-
batches = df._to_pandas_batches(page_size=PAGE_SIZE)
32-
assert (tr := batches.total_rows) is not None and tr >= 0
31+
batches = df.to_pandas_batches(page_size=PAGE_SIZE)
3332
next(iter(batches))
3433

3534
# Simulate the user sorting by a column and visualizing those results
@@ -38,9 +37,8 @@ def sort_output(*, project_id, dataset_id, table_id):
3837
sort_column = "col_bool_0"
3938

4039
df_sorted = df.sort_values(sort_column)
41-
batches = df_sorted._to_pandas_batches(page_size=PAGE_SIZE)
42-
assert (tr := batches.total_rows) is not None and tr >= 0
43-
next(iter(batches))
40+
batches_sorted = df_sorted.to_pandas_batches(page_size=PAGE_SIZE)
41+
next(iter(batches_sorted))
4442

4543

4644
if __name__ == "__main__":

0 commit comments

Comments
 (0)