merge change

shuoweil · shuoweil · commit 42e0580c487a · 2025-10-22T00:48:28.000Z
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -124,9 +124,6 @@ def total_bytes_processed(self) -> Optional[int]:
     def __next__(self) -> pd.DataFrame:
         return next(self._dataframes)
 
-    def __iter__(self) -> Iterator[pd.DataFrame]:
-        return self
-
 
 @dataclasses.dataclass()
 class MaterializationOptions:
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
@@ -1882,7 +1882,7 @@ def to_pandas_batches(
         max_results: Optional[int] = None,
         *,
         allow_large_results: Optional[bool] = None,
-    ) -> blocks.PandasBatches:
+    ) -> Iterable[pandas.DataFrame]:
         """Stream DataFrame results to an iterable of pandas DataFrame.
 
         page_size and max_results determine the size and number of batches,
@@ -1925,7 +1925,7 @@ def to_pandas_batches(
                 over the default size limit of 10 GB.
 
         Returns:
-            blocks.PandasBatches:
+            Iterable[pandas.DataFrame]:
                 An iterable of smaller dataframes which combine to
                 form the original dataframe. Results stream from bigquery,
                 see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.table.RowIterator#google_cloud_bigquery_table_RowIterator_to_arrow_iterable
diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
@@ -23,6 +23,8 @@
 import pandas as pd
 
 import bigframes
+from bigframes.core.blocks import PandasBatches
+import bigframes.dataframe
 import bigframes.display.html
 
 # anywidget and traitlets are optional dependencies. We don't want the import of
@@ -56,6 +58,7 @@ class TableWidget(WIDGET_BASE):
     row_count = traitlets.Int(0).tag(sync=True)
     table_html = traitlets.Unicode().tag(sync=True)
     _initial_load_complete = traitlets.Bool(False).tag(sync=True)
+    _batches: PandasBatches
 
     def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         """Initialize the TableWidget.
@@ -65,8 +68,7 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         """
         if not ANYWIDGET_INSTALLED:
             raise ImportError(
-                "Please `pip install anywidget traitlets` or "
-                "`pip install 'bigframes[anywidget]'` to use TableWidget."
+                "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
             )
 
         self._dataframe = dataframe
@@ -85,16 +87,22 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
         self._cached_batches: List[pd.DataFrame] = []
 
-        # Respect display options for initial page size
-        self.page_size = bigframes.options.display.max_rows
+        # respect display options for initial page size
+        initial_page_size = bigframes.options.display.max_rows
 
-        # The query issued by `to_pandas_batches()` already contains
-        # metadata about how many results there were. Use that to avoid
-        # doing an extra COUNT(*) query that `len(...)` would do.
-        self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
-        # TODO (shuowei): total_rows=None Incorrectly Defaults to 0. b/452747934
+        # set traitlets properties that trigger observers
+        self.page_size = initial_page_size
+
+        # len(dataframe) is expensive, since it will trigger a
+        # SELECT COUNT(*) query. It is a must have however.
+        # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
+        # before we get here so that the count might already be cached.
+        # TODO(b/452747934): Allow row_count to be None and check to see if
+        # there are multiple pages and show "page 1 of many" in this case
+        self._reset_batches_for_new_page_size()
         self.row_count = self._batches.total_rows or 0
 
+        # get the initial page
         self._set_table_html()
         self._initial_load_complete = True
         self._initializing = False
@@ -187,7 +195,7 @@ def _cached_data(self) -> pd.DataFrame:
 
     def _reset_batches_for_new_page_size(self) -> None:
         """Reset the batch iterator when page size changes."""
-        self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
+        self._batches = self._dataframe._to_pandas_batches(page_size=self.page_size)
 
         self._cached_batches = []
         self._batch_iter = None
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
@@ -128,34 +128,24 @@
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/html": [
-       "Query job 6d85c081-49c7-408a-ab96-e0e9e5102419 is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6d85c081-49c7-408a-ab96-e0e9e5102419&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "31ba8e41e4ca4579b85409237cb7a566",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dataframe.py:868: UserWarning: Anywidget mode is not available. Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. Falling back to deferred mode. Error: Traceback (most recent call last):\n",
+      "  File \"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dataframe.py\", line 861, in _repr_html_\n",
+      "    widget = display.TableWidget(df.copy())\n",
+      "  File \"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/display/anywidget.py\", line 100, in __init__\n",
+      "    self.row_count = self._batches.total_rows or 0\n",
+      "AttributeError: 'TableWidget' object has no attribute '_batches'\n",
+      "\n",
+      "  warnings.warn(\n"
+     ]
     },
     {
      "data": {
-      "text/html": [],
+      "text/html": [
+       "Computation deferred. Computation will process 171.4 MB"
+      ],
       "text/plain": [
        "Computation deferred. Computation will process 171.4 MB"
       ]
@@ -184,37 +174,16 @@
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/html": [
-       "Query job 48cb4908-a59a-420f-8fcb-200d0d9187ef is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:48cb4908-a59a-420f-8fcb-200d0d9187ef&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total pages: 555246\n"
+     "ename": "AttributeError",
+     "evalue": "'TableWidget' object has no attribute '_batches'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-7-49e9807f0359>\u001b[0m in \u001b[0;36m<cell line: 5>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;31m# Create widget programmatically\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mwidget\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTableWidget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/src/github.com/googleapis/python-bigquery-dataframes/bigframes/display/anywidget.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, dataframe)\u001b[0m\n\u001b[1;32m     98\u001b[0m         \u001b[0;31m# TODO(b/452747934): Allow row_count to be None and check to see if\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     99\u001b[0m         \u001b[0;31m# there are multiple pages and show \"page 1 of many\" in this case\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrow_count\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_batches\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotal_rows\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    102\u001b[0m         \u001b[0;31m# get the initial page\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'TableWidget' object has no attribute '_batches'"
      ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5d22f3f19e4140b0ba51869e97c3f690",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
     }
    ],
    "source": [
@@ -239,20 +208,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "12b68f15",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Current page: 0\n",
-      "After next: 1\n",
-      "After prev: 0\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Simulate button clicks programmatically\n",
     "print(\"Current page:\", widget.page)\n",