update how we handle negative page

shuoweil · shuoweil · commit cdd232c5497b · 2025-11-17T19:58:56.000Z
diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
@@ -98,13 +98,16 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         # SELECT COUNT(*) query. It is a must have however.
         # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
         # before we get here so that the count might already be cached.
-        # TODO(b/452747934): Allow row_count to be None and check to see if
-        # there are multiple pages and show "page 1 of many" in this case
         self._reset_batches_for_new_page_size()
-        if self._batches is None or self._batches.total_rows is None:
-            # TODO(b/428238610): We could still end up with a None here if the
-            # underlying execution doesn't produce a total row count.
-            self._error_message = "Could not determine total row count. Data might be unavailable or an error occurred."
+
+        if self._batches is None:
+            self._error_message = "Could not retrieve data batches. Data might be unavailable or an error occurred."
+            self.row_count = None
+        elif self._batches.total_rows is None:
+            # Total rows is unknown, this is an expected state.
+            # TODO(b/461536343): Cheaply discover if we have exactly 1 page.
+            # There are cases where total rows is not set, but there are no additional
+            # pages. We could disable the "next" button in these cases.
             self.row_count = None
         else:
             self.row_count = self._batches.total_rows
@@ -139,14 +142,20 @@ def _validate_page(self, proposal: Dict[str, Any]) -> int:
         """
         value = proposal["value"]
 
-        # If row count is unknown, allow any non-negative page
-        if self.row_count is None:
-            return max(0, value)
+        if value < 0:
+            raise ValueError("Page number cannot be negative.")
 
-        # If truly empty or invalid page size, stay on page 0
+        # If truly empty or invalid page size, stay on page 0.
+        # This handles cases where row_count is 0 or page_size is 0, preventing
+        # division by zero or nonsensical pagination, regardless of row_count being None.
         if self.row_count == 0 or self.page_size == 0:
             return 0
 
+        # If row count is unknown, allow any non-negative page. The previous check
+        # ensures that invalid page_size (0) is already handled.
+        if self.row_count is None:
+            return value
+
         # Calculate the zero-indexed maximum page number.
         max_page = max(0, math.ceil(self.row_count / self.page_size) - 1)
 
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
@@ -142,7 +142,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "710b4e9c06df4faf82011dd676e9d9a5",
+       "model_id": "8fcad7b7e408422cae71d519cd2d4980",
        "version_major": 2,
        "version_minor": 1
       },
@@ -166,7 +166,7 @@
     }
    ],
    "source": [
-    "df"
+    "df.set_index(\"name\")"
    ]
   },
   {
@@ -205,7 +205,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "62661c02a1a742d79e53dcb047343e86",
+       "model_id": "06cb98c577514d5c9654a7792d93f8e6",
        "version_major": 2,
        "version_minor": 1
       },
@@ -305,7 +305,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b92000a35d0c4b8a9a8aaef4784e94dd",
+       "model_id": "1672f826f7a347e38539dbb5fb72cd43",
        "version_major": 2,
        "version_minor": 1
       },
@@ -326,15 +326,6 @@
     "small_widget"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "a9d5d13b",
-   "metadata": {},
-   "source": [
-    "### Handling of Invalid Row Count\n",
-    "In cases where the total number of rows cannot be determined, `row_count` will be `None`. The widget will display 'Page X of many', and you can navigate forward until you reach the end of the available data. If you navigate beyond the last page, the widget will automatically return to the last valid page."
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "added-cell-2",
@@ -354,7 +345,7 @@
      "data": {
       "text/html": [
        "✅ Completed. \n",
-       "    Query processed 85.9 kB in 13 seconds of slot time.\n",
+       "    Query processed 85.9 kB in 12 seconds of slot time.\n",
        "    "
       ],
       "text/plain": [
@@ -389,7 +380,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2620318ca07e405fa7ce950a2d931e6a",
+       "model_id": "127a2e356b834c18b6f07c58ee2c4228",
        "version_major": 2,
        "version_minor": 1
       },
@@ -424,6 +415,93 @@
     "  LIMIT 5;\n",
     "\"\"\")"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "multi-index-display-markdown",
+   "metadata": {},
+   "source": [
+    "## Display Multi-Index DataFrame in anywidget mode\n",
+    "This section demonstrates how BigFrames can display a DataFrame with multiple levels of indexing (a \"multi-index\") when using the `anywidget` display mode."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "ad7482aa",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. \n",
+       "    Query processed 483.3 GB in 51 minutes of slot time. [<a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:3eace7c0-7776-48d6-925c-965be33d8738&page=queryresults\">Job bigframes-dev:US.3eace7c0-7776-48d6-925c-965be33d8738 details</a>]\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. \n",
+       "    Query processed 124.4 MB in 7 seconds of slot time. [<a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:job_UJ5cx4R1jW5cNxq_1H1x-9-ATfqS&page=queryresults\">Job bigframes-dev:US.job_UJ5cx4R1jW5cNxq_1H1x-9-ATfqS details</a>]\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3f9652b5fdc0441eac2b05ab36d571d0",
+       "version_major": 2,
+       "version_minor": 1
+      },
+      "text/plain": [
+       "TableWidget(page_size=10, row_count=3967869, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [],
+      "text/plain": [
+       "Computation deferred. Computation will process 513.5 GB"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import datetime\n",
+    "\n",
+    " # Read the PyPI downloads dataset\n",
+    "pypi_df = bpd.read_gbq(\"bigquery-public-data.pypi.file_downloads\")\n",
+    "\n",
+    "# Filter for the last 7 days to reduce the data size for this example\n",
+    "seven_days_ago = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=7)\n",
+    "pypi_df_recent = pypi_df[pypi_df[\"timestamp\"] > seven_days_ago]\n",
+    " \n",
+    "# Create a multi-index by grouping by date and project\n",
+    "pypi_df_recent['date'] = pypi_df_recent['timestamp'].dt.date\n",
+    "multi_index_df = pypi_df_recent.groupby([\"date\", \"project\"]).size().to_frame(\"downloads\")\n",
+    " \n",
+    "# Display the DataFrame with the multi-index\n",
+    "multi_index_df"
+   ]
   }
  ],
  "metadata": {
diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py
@@ -229,20 +229,15 @@ def test_widget_navigation_should_display_correct_page(
     _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df)
 
 
-def test_widget_navigation_should_clamp_to_zero_for_negative_input(
+def test_widget_navigation_should_raise_error_for_negative_input(
     table_widget, paginated_pandas_df: pd.DataFrame
 ):
     """
     Given a widget, when a negative page number is set,
-    then the page number should be clamped to 0 and display the first page.
+    then a ValueError should be raised.
     """
-    expected_slice = paginated_pandas_df.iloc[0:2]
-
-    table_widget.page = -1
-    html = table_widget.table_html
-
-    assert table_widget.page == 0
-    _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df)
+    with pytest.raises(ValueError, match="Page number cannot be negative."):
+        table_widget.page = -1
 
 
 def test_widget_navigation_should_clamp_to_last_page_for_out_of_bounds_input(
@@ -500,20 +495,18 @@ def __next__(self):
         raise ValueError("Simulated read error")
 
 
-def test_widget_should_fallback_to_zero_rows_with_invalid_total_rows(
+def test_widget_should_show_error_on_batch_failure(
     paginated_bf_df: bf.dataframe.DataFrame,
     monkeypatch: pytest.MonkeyPatch,
 ):
     """
-    Given an internal component fails to return valid execution data,
-    when the TableWidget is created, its error_message should be set and displayed.
+    Given that the internal call to `_to_pandas_batches` fails and returns None,
+    when the TableWidget is created, its `error_message` should be set and displayed.
     """
-    # Patch the executor's 'execute' method to simulate an error.
+    # Patch the DataFrame's batch creation method to simulate a failure.
     monkeypatch.setattr(
-        "bigframes.session.bq_caching_executor.BigQueryCachingExecutor.execute",
-        lambda self, *args, **kwargs: mock_execute_result_with_params(
-            self, paginated_bf_df._block.expr.schema, None, [], *args, **kwargs
-        ),
+        "bigframes.dataframe.DataFrame._to_pandas_batches",
+        lambda self, *args, **kwargs: None,
     )
 
     # Create the TableWidget under the error condition.
@@ -526,7 +519,7 @@ def test_widget_should_fallback_to_zero_rows_with_invalid_total_rows(
     # The widget should have an error message and display it in the HTML.
     assert widget.row_count is None
     assert widget._error_message is not None
-    assert "Could not determine total row count" in widget._error_message
+    assert "Could not retrieve data batches" in widget._error_message
     assert widget._error_message in widget.table_html