get actual row_counts

shuoweil · shuoweil · commit eedd9299bced · 2025-10-02T03:34:03.000Z
diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
@@ -17,8 +17,7 @@
 from importlib import resources
 import functools
 import math
-import typing
-from typing import Any, cast, Dict, Iterator, List, Optional, Type
+from typing import Any, Dict, Iterator, List, Optional, Type
 import uuid
 
 import pandas as pd
@@ -76,17 +75,19 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         # Respect display options for initial page size
         initial_page_size = bigframes.options.display.max_rows
 
-        batches = dataframe.to_pandas_batches(
-            page_size=initial_page_size,
-        )
-        self._batches: bigframes.core.blocks.PandasBatches = cast(
-            bigframes.core.blocks.PandasBatches, batches
+        execute_result = dataframe._block.session._executor.execute(
+            dataframe._block.expr,
+            ordered=True,
+            use_explicit_destination=True,
         )
 
         # The query issued by `to_pandas_batches()` already contains metadata
         # about how many results there were. Use that to avoid doing an extra
         # COUNT(*) query that `len(...)` would do.
-        self.row_count = self._batches.total_rows or 0
+        self.row_count = execute_result.total_rows or 0
+
+        # Create pandas batches from the ExecuteResult
+        self._batches = execute_result.to_pandas_batches(page_size=initial_page_size)
 
         # Set page_size after _batches is available since traitlets observers
         # may depend on _batches being initialized when the change trigger happens
@@ -189,8 +190,16 @@ def _cached_data(self) -> pd.DataFrame:
 
     def _reset_batches_for_new_page_size(self):
         """Reset the batch iterator when page size changes."""
-        batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
-        self._batches = typing.cast(bigframes.core.blocks.PandasBatches, batches)
+        # Execute with explicit destination for consistency with __init__
+        execute_result = self._dataframe._block.session._executor.execute(
+            self._dataframe._block.expr,
+            ordered=True,
+            use_explicit_destination=True,
+        )
+
+        # Create pandas batches from the ExecuteResult
+        self._batches = execute_result.to_pandas_batches(page_size=self.page_size)
+
         self._cached_batches = []
         self._batch_iter = None
         self._all_data_loaded = False
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
@@ -73,18 +73,6 @@
    "id": "f289d250",
    "metadata": {},
    "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Query job 1ea2b594-2bd7-46de-a3c8-6aeee5884ba2 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1ea2b594-2bd7-46de-a3c8-6aeee5884ba2&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -142,7 +130,19 @@
     {
      "data": {
       "text/html": [
-       "Query job 67e679e9-94da-47f7-8be1-8b4a496fbfbd is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:67e679e9-94da-47f7-8be1-8b4a496fbfbd&page=queryresults\">Open Job</a>"
+       "Query job 3245c62b-5969-4b78-b1f2-4330592d3c65 is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:3245c62b-5969-4b78-b1f2-4330592d3c65&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 1a5cec48-7128-4986-86a6-369a8f366974 is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1a5cec48-7128-4986-86a6-369a8f366974&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -154,7 +154,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e74c3920b93644a0b2afdaa3841cad31",
+       "model_id": "d59362abcff6445ea879b5f43e0ca9b3",
        "version_major": 2,
        "version_minor": 1
       },
@@ -195,6 +195,30 @@
    "id": "6920d49b",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 356f561b-5017-413f-950b-2bc4c7798a24 is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:356f561b-5017-413f-950b-2bc4c7798a24&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 72162728-56a3-47ce-bdb1-61b038cc2146 is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:72162728-56a3-47ce-bdb1-61b038cc2146&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -205,12 +229,13 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b4f7a3f86ef54e07b24ef10061088391",
+       "model_id": "8fac39e9b92e42d283883137f155526f",
        "version_major": 2,
        "version_minor": 1
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
+       "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
       ]
      },
      "execution_count": 6,
@@ -285,10 +310,34 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/swast/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/array_value.py:231: AmbiguousWindowWarning: Window ordering may be ambiguous, this can cause unstable results.\n",
+      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/array_value.py:230: AmbiguousWindowWarning: Window ordering may be ambiguous, this can cause unstable results.\n",
       "  warnings.warn(msg, bfe.AmbiguousWindowWarning)\n"
      ]
     },
+    {
+     "data": {
+      "text/html": [
+       "Query job 77f0582b-b68c-46a7-bf25-463837a4ef3f is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:77f0582b-b68c-46a7-bf25-463837a4ef3f&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job ec2bcbc2-0f5a-45e9-affc-485183cb245e is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ec2bcbc2-0f5a-45e9-affc-485183cb245e&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -299,12 +348,13 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "44a829aca2f24cfdba4b61afd1a259fe",
+       "model_id": "fe6358fd83d6431198944e601ea00372",
        "version_major": 2,
        "version_minor": 1
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
+       "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
       ]
      },
      "execution_count": 8,
@@ -319,14 +369,6 @@
     "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
     "small_widget"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c4e5836b-c872-4a9c-b9ec-14f6f338176d",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/tests/benchmark/read_gbq_colab/aggregate_output.py b/tests/benchmark/read_gbq_colab/aggregate_output.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pathlib
-import typing
 
 import benchmark.utils as utils
 
@@ -27,8 +26,13 @@ def aggregate_output(*, project_id, dataset_id, table_id):
     df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
 
     # Simulate getting the first page, since we'll always do that first in the UI.
-    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
-    assert typing.cast(typing.Any, batches).total_rows >= 0
+    execute_result = df._block.session._executor.execute(
+        df._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
+    )
+    assert execute_result.total_rows is not None and execute_result.total_rows >= 0
+    batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
     next(iter(batches))
 
     # To simulate very small rows that can only fit a boolean,
@@ -44,8 +48,18 @@ def aggregate_output(*, project_id, dataset_id, table_id):
         .groupby("rounded")
         .sum(numeric_only=True)
     )
-
-    batches_aggregated = df_aggregated.to_pandas_batches(page_size=PAGE_SIZE)
+    execute_result_aggregated = df_aggregated._block.session._executor.execute(
+        df_aggregated._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
+    )
+    assert (
+        execute_result_aggregated.total_rows is not None
+        and execute_result_aggregated.total_rows >= 0
+    )
+    batches_aggregated = execute_result_aggregated.to_pandas_batches(
+        page_size=PAGE_SIZE
+    )
     next(iter(batches_aggregated))
 
 
diff --git a/tests/benchmark/read_gbq_colab/filter_output.py b/tests/benchmark/read_gbq_colab/filter_output.py
@@ -12,11 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pathlib
-import typing
 
 import benchmark.utils as utils
 
-import bigframes.core.blocks
 import bigframes.pandas as bpd
 
 PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
@@ -33,17 +31,29 @@ def filter_output(
     df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
 
     # Simulate getting the first page, since we'll always do that first in the UI.
-    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
+    # Force BigQuery execution to get total_rows metadata
+    execute_result = df._block.session._executor.execute(
+        df._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
+    )
+    batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
     next(iter(batches))
 
     # Simulate the user filtering by a column and visualizing those results
     df_filtered = df[df["col_bool_0"]]
-    batches_filtered = df_filtered.to_pandas_batches(page_size=PAGE_SIZE)
-    batches_filtered = typing.cast(
-        bigframes.core.blocks.PandasBatches, batches_filtered
+    # Force BigQuery execution for filtered DataFrame to get total_rows metadata
+    execute_result_filtered = df_filtered._block.session._executor.execute(
+        df_filtered._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
     )
-    rows = batches_filtered.total_rows
+
+    rows = execute_result_filtered.total_rows or 0
     assert rows >= 0
+
+    batches_filtered = execute_result_filtered.to_pandas_batches(page_size=PAGE_SIZE)
+
     # It's possible we don't have any pages at all, since we filtered out all
     # matching rows.
     first_page = next(iter(batches_filtered))
diff --git a/tests/benchmark/read_gbq_colab/first_page.py b/tests/benchmark/read_gbq_colab/first_page.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pathlib
-import typing
 
 import benchmark.utils as utils
 
@@ -29,8 +28,13 @@ def first_page(*, project_id, dataset_id, table_id):
     )
 
     # Get number of rows (to calculate number of pages) and the first page.
-    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
-    assert typing.cast(typing.Any, batches).total_rows >= 0
+    execute_result = df._block.session._executor.execute(
+        df._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
+    )
+    assert execute_result.total_rows is not None and execute_result.total_rows >= 0
+    batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
     first_page = next(iter(batches))
     assert first_page is not None
 
diff --git a/tests/benchmark/read_gbq_colab/sort_output.py b/tests/benchmark/read_gbq_colab/sort_output.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pathlib
-import typing
 
 import benchmark.utils as utils
 
@@ -29,8 +28,13 @@ def sort_output(*, project_id, dataset_id, table_id):
     )
 
     # Simulate getting the first page, since we'll always do that first in the UI.
-    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
-    assert typing.cast(typing.Any, batches).total_rows >= 0
+    execute_result = df._block.session._executor.execute(
+        df._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
+    )
+    assert execute_result.total_rows is not None and execute_result.total_rows >= 0
+    batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
     next(iter(batches))
 
     # Simulate the user sorting by a column and visualizing those results
@@ -39,8 +43,16 @@ def sort_output(*, project_id, dataset_id, table_id):
         sort_column = "col_bool_0"
 
     df_sorted = df.sort_values(sort_column)
-    batches_sorted = df_sorted.to_pandas_batches(page_size=PAGE_SIZE)
-    assert typing.cast(typing.Any, batches_sorted).total_rows >= 0
+    execute_result_sorted = df_sorted._block.session._executor.execute(
+        df_sorted._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
+    )
+    assert (
+        execute_result_sorted.total_rows is not None
+        and execute_result_sorted.total_rows >= 0
+    )
+    batches_sorted = execute_result_sorted.to_pandas_batches(page_size=PAGE_SIZE)
     next(iter(batches_sorted))
 
 
diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py