minor updates

shuoweil · shuoweil · commit e41b3bababee · 2025-10-03T04:15:17.000Z
diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
@@ -69,10 +69,15 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
             )
 
         self._dataframe = dataframe
-        # This flag is used to prevent observers from firing during initialization.
-        self._initializing = True
+
         super().__init__()
 
+        # This flag prevents observers from firing during initialization.
+        # When traitlets like `page` and `page_size` are set in `__init__`, we
+        # don't want their corresponding `_..._changed` methods to execute
+        # until the widget is fully constructed.
+        self._initializing = True
+
         # Initialize attributes that might be needed by observers first
         self._table_id = str(uuid.uuid4())
         self._all_data_loaded = False
@@ -170,7 +175,8 @@ def _get_next_batch(self) -> bool:
     @property
     def _batch_iterator(self) -> Iterator[pd.DataFrame]:
         """Lazily initializes and returns the batch iterator."""
-        self._batch_iter = iter(self._batches)
+        if self._batch_iter is None:
+            self._batch_iter = iter(self._batches)
         return self._batch_iter
 
     @property
diff --git a/tests/benchmark/read_gbq_colab/aggregate_output.py b/tests/benchmark/read_gbq_colab/aggregate_output.py
@@ -25,12 +25,13 @@ def aggregate_output(*, project_id, dataset_id, table_id):
     # e.g. "{local_inline}" or "{local_large}"
     df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
 
-    # Simulate getting the first page, since we'll always do that first in the UI.
+    # Call the executor directly to isolate the query execution time
+    # from other DataFrame overhead for this benchmark.
     execute_result = df._block.session._executor.execute(
         df._block.expr,
         ordered=True,
         use_explicit_destination=True,
-    )
+    )  # type: ignore[call-arg]
     assert execute_result.total_rows is not None and execute_result.total_rows >= 0
     batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
     next(iter(batches))
@@ -52,7 +53,7 @@ def aggregate_output(*, project_id, dataset_id, table_id):
         df_aggregated._block.expr,
         ordered=True,
         use_explicit_destination=True,
-    )
+    )  # type: ignore[call-arg]
     assert (
         execute_result_aggregated.total_rows is not None
         and execute_result_aggregated.total_rows >= 0
diff --git a/tests/benchmark/read_gbq_colab/filter_output.py b/tests/benchmark/read_gbq_colab/filter_output.py
@@ -30,13 +30,13 @@ def filter_output(
     # e.g. "{local_inline}" or "{local_large}"
     df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
 
-    # Simulate getting the first page, since we'll always do that first in the UI.
-    # Force BigQuery execution to get total_rows metadata
+    # Call the executor directly to isolate the query execution time
+    # from other DataFrame overhead for this benchmark.
     execute_result = df._block.session._executor.execute(
         df._block.expr,
         ordered=True,
         use_explicit_destination=True,
-    )
+    )  # type: ignore[call-arg]
     batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
     next(iter(batches))
 
diff --git a/tests/benchmark/read_gbq_colab/first_page.py b/tests/benchmark/read_gbq_colab/first_page.py
@@ -27,12 +27,13 @@ def first_page(*, project_id, dataset_id, table_id):
         f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
     )
 
-    # Get number of rows (to calculate number of pages) and the first page.
+    # Call the executor directly to isolate the query execution time
+    # from other DataFrame overhead for this benchmark.
     execute_result = df._block.session._executor.execute(
         df._block.expr,
         ordered=True,
         use_explicit_destination=True,
-    )
+    )  # type: ignore[call-arg]
     assert execute_result.total_rows is not None and execute_result.total_rows >= 0
     batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
     first_page = next(iter(batches))
diff --git a/tests/benchmark/read_gbq_colab/sort_output.py b/tests/benchmark/read_gbq_colab/sort_output.py
@@ -27,12 +27,13 @@ def sort_output(*, project_id, dataset_id, table_id):
         f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
     )
 
-    # Simulate getting the first page, since we'll always do that first in the UI.
+    # Call the executor directly to isolate the query execution time
+    # from other DataFrame overhead for this benchmark.
     execute_result = df._block.session._executor.execute(
         df._block.expr,
         ordered=True,
         use_explicit_destination=True,
-    )
+    )  # type: ignore[call-arg]
     assert execute_result.total_rows is not None and execute_result.total_rows >= 0
     batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
     next(iter(batches))
@@ -47,7 +48,7 @@ def sort_output(*, project_id, dataset_id, table_id):
         df_sorted._block.expr,
         ordered=True,
         use_explicit_destination=True,
-    )
+    )  # type: ignore[call-arg]
     assert (
         execute_result_sorted.total_rows is not None
         and execute_result_sorted.total_rows >= 0
diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py
@@ -484,7 +484,7 @@ def __next__(self):
         raise ValueError("Simulated read error")
 
 
-def test_widget_should_fallback_to_zero_rows_with_invlid_total_rows(
+def test_widget_should_fallback_to_zero_rows_with_invalid_total_rows(
     paginated_bf_df: bf.dataframe.DataFrame,
     monkeypatch: pytest.MonkeyPatch,
 ):