Skip to content

Commit cdd232c

Browse files
committed
update how we handle negative page
1 parent 49943ce commit cdd232c

File tree

3 files changed

+123
-43
lines changed

3 files changed

+123
-43
lines changed

bigframes/display/anywidget.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,16 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
9898
# SELECT COUNT(*) query. It is a must have however.
9999
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
100100
# before we get here so that the count might already be cached.
101-
# TODO(b/452747934): Allow row_count to be None and check to see if
102-
# there are multiple pages and show "page 1 of many" in this case
103101
self._reset_batches_for_new_page_size()
104-
if self._batches is None or self._batches.total_rows is None:
105-
# TODO(b/428238610): We could still end up with a None here if the
106-
# underlying execution doesn't produce a total row count.
107-
self._error_message = "Could not determine total row count. Data might be unavailable or an error occurred."
102+
103+
if self._batches is None:
104+
self._error_message = "Could not retrieve data batches. Data might be unavailable or an error occurred."
105+
self.row_count = None
106+
elif self._batches.total_rows is None:
107+
# Total rows is unknown, this is an expected state.
108+
# TODO(b/461536343): Cheaply discover if we have exactly 1 page.
109+
# There are cases where total rows is not set, but there are no additional
110+
# pages. We could disable the "next" button in these cases.
108111
self.row_count = None
109112
else:
110113
self.row_count = self._batches.total_rows
@@ -139,14 +142,20 @@ def _validate_page(self, proposal: Dict[str, Any]) -> int:
139142
"""
140143
value = proposal["value"]
141144

142-
# If row count is unknown, allow any non-negative page
143-
if self.row_count is None:
144-
return max(0, value)
145+
if value < 0:
146+
raise ValueError("Page number cannot be negative.")
145147

146-
# If truly empty or invalid page size, stay on page 0
148+
# If truly empty or invalid page size, stay on page 0.
149+
# This handles cases where row_count is 0 or page_size is 0, preventing
150+
# division by zero or nonsensical pagination, regardless of row_count being None.
147151
if self.row_count == 0 or self.page_size == 0:
148152
return 0
149153

154+
# If row count is unknown, allow any non-negative page. The previous check
155+
# ensures that invalid page_size (0) is already handled.
156+
if self.row_count is None:
157+
return value
158+
150159
# Calculate the zero-indexed maximum page number.
151160
max_page = max(0, math.ceil(self.row_count / self.page_size) - 1)
152161

notebooks/dataframes/anywidget_mode.ipynb

Lines changed: 93 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@
142142
{
143143
"data": {
144144
"application/vnd.jupyter.widget-view+json": {
145-
"model_id": "710b4e9c06df4faf82011dd676e9d9a5",
145+
"model_id": "8fcad7b7e408422cae71d519cd2d4980",
146146
"version_major": 2,
147147
"version_minor": 1
148148
},
@@ -166,7 +166,7 @@
166166
}
167167
],
168168
"source": [
169-
"df"
169+
"df.set_index(\"name\")"
170170
]
171171
},
172172
{
@@ -205,7 +205,7 @@
205205
{
206206
"data": {
207207
"application/vnd.jupyter.widget-view+json": {
208-
"model_id": "62661c02a1a742d79e53dcb047343e86",
208+
"model_id": "06cb98c577514d5c9654a7792d93f8e6",
209209
"version_major": 2,
210210
"version_minor": 1
211211
},
@@ -305,7 +305,7 @@
305305
{
306306
"data": {
307307
"application/vnd.jupyter.widget-view+json": {
308-
"model_id": "b92000a35d0c4b8a9a8aaef4784e94dd",
308+
"model_id": "1672f826f7a347e38539dbb5fb72cd43",
309309
"version_major": 2,
310310
"version_minor": 1
311311
},
@@ -326,15 +326,6 @@
326326
"small_widget"
327327
]
328328
},
329-
{
330-
"cell_type": "markdown",
331-
"id": "a9d5d13b",
332-
"metadata": {},
333-
"source": [
334-
"### Handling of Invalid Row Count\n",
335-
"In cases where the total number of rows cannot be determined, `row_count` will be `None`. The widget will display 'Page X of many', and you can navigate forward until you reach the end of the available data. If you navigate beyond the last page, the widget will automatically return to the last valid page."
336-
]
337-
},
338329
{
339330
"cell_type": "markdown",
340331
"id": "added-cell-2",
@@ -354,7 +345,7 @@
354345
"data": {
355346
"text/html": [
356347
"✅ Completed. \n",
357-
" Query processed 85.9 kB in 13 seconds of slot time.\n",
348+
" Query processed 85.9 kB in 12 seconds of slot time.\n",
358349
" "
359350
],
360351
"text/plain": [
@@ -389,7 +380,7 @@
389380
{
390381
"data": {
391382
"application/vnd.jupyter.widget-view+json": {
392-
"model_id": "2620318ca07e405fa7ce950a2d931e6a",
383+
"model_id": "127a2e356b834c18b6f07c58ee2c4228",
393384
"version_major": 2,
394385
"version_minor": 1
395386
},
@@ -424,6 +415,93 @@
424415
" LIMIT 5;\n",
425416
"\"\"\")"
426417
]
418+
},
419+
{
420+
"cell_type": "markdown",
421+
"id": "multi-index-display-markdown",
422+
"metadata": {},
423+
"source": [
424+
"## Display Multi-Index DataFrame in anywidget mode\n",
425+
"This section demonstrates how BigFrames can display a DataFrame with multiple levels of indexing (a \"multi-index\") when using the `anywidget` display mode."
426+
]
427+
},
428+
{
429+
"cell_type": "code",
430+
"execution_count": 11,
431+
"id": "ad7482aa",
432+
"metadata": {},
433+
"outputs": [
434+
{
435+
"data": {
436+
"text/html": [
437+
"✅ Completed. \n",
438+
" Query processed 483.3 GB in 51 minutes of slot time. [<a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:3eace7c0-7776-48d6-925c-965be33d8738&page=queryresults\">Job bigframes-dev:US.3eace7c0-7776-48d6-925c-965be33d8738 details</a>]\n",
439+
" "
440+
],
441+
"text/plain": [
442+
"<IPython.core.display.HTML object>"
443+
]
444+
},
445+
"metadata": {},
446+
"output_type": "display_data"
447+
},
448+
{
449+
"data": {
450+
"text/html": [
451+
"✅ Completed. \n",
452+
" Query processed 124.4 MB in 7 seconds of slot time. [<a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:job_UJ5cx4R1jW5cNxq_1H1x-9-ATfqS&page=queryresults\">Job bigframes-dev:US.job_UJ5cx4R1jW5cNxq_1H1x-9-ATfqS details</a>]\n",
453+
" "
454+
],
455+
"text/plain": [
456+
"<IPython.core.display.HTML object>"
457+
]
458+
},
459+
"metadata": {},
460+
"output_type": "display_data"
461+
},
462+
{
463+
"data": {
464+
"application/vnd.jupyter.widget-view+json": {
465+
"model_id": "3f9652b5fdc0441eac2b05ab36d571d0",
466+
"version_major": 2,
467+
"version_minor": 1
468+
},
469+
"text/plain": [
470+
"TableWidget(page_size=10, row_count=3967869, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
471+
]
472+
},
473+
"metadata": {},
474+
"output_type": "display_data"
475+
},
476+
{
477+
"data": {
478+
"text/html": [],
479+
"text/plain": [
480+
"Computation deferred. Computation will process 513.5 GB"
481+
]
482+
},
483+
"execution_count": 11,
484+
"metadata": {},
485+
"output_type": "execute_result"
486+
}
487+
],
488+
"source": [
489+
"import datetime\n",
490+
"\n",
491+
" # Read the PyPI downloads dataset\n",
492+
"pypi_df = bpd.read_gbq(\"bigquery-public-data.pypi.file_downloads\")\n",
493+
"\n",
494+
"# Filter for the last 7 days to reduce the data size for this example\n",
495+
"seven_days_ago = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=7)\n",
496+
"pypi_df_recent = pypi_df[pypi_df[\"timestamp\"] > seven_days_ago]\n",
497+
" \n",
498+
"# Create a multi-index by grouping by date and project\n",
499+
"pypi_df_recent['date'] = pypi_df_recent['timestamp'].dt.date\n",
500+
"multi_index_df = pypi_df_recent.groupby([\"date\", \"project\"]).size().to_frame(\"downloads\")\n",
501+
" \n",
502+
"# Display the DataFrame with the multi-index\n",
503+
"multi_index_df"
504+
]
427505
}
428506
],
429507
"metadata": {

tests/system/small/test_anywidget.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -229,20 +229,15 @@ def test_widget_navigation_should_display_correct_page(
229229
_assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df)
230230

231231

232-
def test_widget_navigation_should_clamp_to_zero_for_negative_input(
232+
def test_widget_navigation_should_raise_error_for_negative_input(
233233
table_widget, paginated_pandas_df: pd.DataFrame
234234
):
235235
"""
236236
Given a widget, when a negative page number is set,
237-
then the page number should be clamped to 0 and display the first page.
237+
then a ValueError should be raised.
238238
"""
239-
expected_slice = paginated_pandas_df.iloc[0:2]
240-
241-
table_widget.page = -1
242-
html = table_widget.table_html
243-
244-
assert table_widget.page == 0
245-
_assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df)
239+
with pytest.raises(ValueError, match="Page number cannot be negative."):
240+
table_widget.page = -1
246241

247242

248243
def test_widget_navigation_should_clamp_to_last_page_for_out_of_bounds_input(
@@ -500,20 +495,18 @@ def __next__(self):
500495
raise ValueError("Simulated read error")
501496

502497

503-
def test_widget_should_fallback_to_zero_rows_with_invalid_total_rows(
498+
def test_widget_should_show_error_on_batch_failure(
504499
paginated_bf_df: bf.dataframe.DataFrame,
505500
monkeypatch: pytest.MonkeyPatch,
506501
):
507502
"""
508-
Given an internal component fails to return valid execution data,
509-
when the TableWidget is created, its error_message should be set and displayed.
503+
Given that the internal call to `_to_pandas_batches` fails and returns None,
504+
when the TableWidget is created, its `error_message` should be set and displayed.
510505
"""
511-
# Patch the executor's 'execute' method to simulate an error.
506+
# Patch the DataFrame's batch creation method to simulate a failure.
512507
monkeypatch.setattr(
513-
"bigframes.session.bq_caching_executor.BigQueryCachingExecutor.execute",
514-
lambda self, *args, **kwargs: mock_execute_result_with_params(
515-
self, paginated_bf_df._block.expr.schema, None, [], *args, **kwargs
516-
),
508+
"bigframes.dataframe.DataFrame._to_pandas_batches",
509+
lambda self, *args, **kwargs: None,
517510
)
518511

519512
# Create the TableWidget under the error condition.
@@ -526,7 +519,7 @@ def test_widget_should_fallback_to_zero_rows_with_invalid_total_rows(
526519
# The widget should have an error message and display it in the HTML.
527520
assert widget.row_count is None
528521
assert widget._error_message is not None
529-
assert "Could not determine total row count" in widget._error_message
522+
assert "Could not retrieve data batches" in widget._error_message
530523
assert widget._error_message in widget.table_html
531524

532525

0 commit comments

Comments
 (0)