googleapis
diff --git a/‎bigframes/dataframe.py‎
Lines changed: 39 additions & 16 deletions b/‎bigframes/dataframe.py‎
Lines changed: 39 additions & 16 deletions
diff --git a/‎bigframes/display/anywidget.py‎
Lines changed: 41 additions & 31 deletions b/‎bigframes/display/anywidget.py‎
Lines changed: 41 additions & 31 deletions
@@ -39,6 +39,7 @@
     Union,
 )
 import warnings
+import weakref
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.frame as vendored_pandas_frame
@@ -87,6 +88,7 @@
 if typing.TYPE_CHECKING:
     from _typeshed import SupportsRichComparison
 
+    from bigframes.display.anywidget import TableWidget
     import bigframes.session
 
     SingleItemValue = Union[bigframes.series.Series, int, float, str, Callable]
@@ -111,6 +113,9 @@ class DataFrame(vendored_pandas_frame.DataFrame):
     # Must be above 5000 for pandas to delegate to bigframes for binops
     __pandas_priority__ = 15000
 
+    # Type annotation for anywidget instance
+    _anywidget_instance: Optional[weakref.ReferenceType["TableWidget"]] = None
+
     def __init__(
         self,
         data=None,
@@ -776,21 +781,7 @@ def _repr_html_(self) -> str:
         if opts.repr_mode == "deferred":
             return formatter.repr_query_job(self._compute_dry_run())
 
-        if opts.repr_mode == "anywidget":
-            try:
-                from bigframes import display
-
-                # Store the widget for _repr_mimebundle_ to use
-                self._anywidget_instance = display.TableWidget(self)
-                # Return a fallback HTML string
-                return "Interactive table widget (anywidget mode)"
-            except (AttributeError, ValueError):
-                # Fallback if anywidget is not available
-                warnings.warn(
-                    "Anywidget mode is not available, falling back to deferred mode."
-                )
-                return formatter.repr_query_job(self._compute_dry_run())
-
+        # Process blob columns first, regardless of display mode
         self._cached()
         df = self.copy()
         if bigframes.options.display.blob_display:
@@ -802,7 +793,40 @@ def _repr_html_(self) -> str:
             for col in blob_cols:
                 # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data.
                 df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
+        else:
+            blob_cols = []
 
+        if opts.repr_mode == "anywidget":
+            try:
+                from IPython.display import display as ipython_display
+
+                from bigframes import display
+
+                # Check if widget instance already exists and reuse it
+                widget = None
+                if (
+                    hasattr(self, "_anywidget_instance")
+                    and self._anywidget_instance is not None
+                ):
+                    widget = self._anywidget_instance()
+
+                # If widget doesn't exist or was garbage collected, create a new one
+                if widget is None:
+                    # Pass the processed dataframe (with blob URLs) to the widget
+                    widget = display.TableWidget(df)
+                    self._anywidget_instance = weakref.ref(widget)
+
+                ipython_display(widget)
+                return ""  # Return empty string since we used display()
+
+            except (AttributeError, ValueError, ImportError):
+                # Fallback if anywidget is not available
+                warnings.warn(
+                    "Anywidget mode is not available. Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. Falling back to deferred mode."
+                )
+                return formatter.repr_query_job(self._compute_dry_run())
+
+        # Continue with regular HTML rendering for non-anywidget modes
         # TODO(swast): pass max_columns and get the true column count back. Maybe
         # get 1 more column than we have requested so that pandas can add the
         # ... for us?
@@ -811,7 +835,6 @@ def _repr_html_(self) -> str:
         )
 
         self._set_internal_query_job(query_job)
-
         column_count = len(pandas_df.columns)
 
         with display_options.pandas_repr(opts):
 
@@ -17,23 +17,21 @@
 from importlib import resources
 import functools
 import math
-from typing import Any, Dict, Iterator, Type, TYPE_CHECKING
+from typing import Any, Dict, Iterator, List, Optional, Type
 import uuid
 
 import pandas as pd
 
 import bigframes
 
-ANYWIDGET_INSTALLED = True
-if TYPE_CHECKING:
+# Simplified import structure as suggested in review
+try:
     import anywidget
     import traitlets
-else:
-    try:
-        import anywidget
-        import traitlets
-    except Exception:
-        ANYWIDGET_INSTALLED = False
+
+    ANYWIDGET_INSTALLED = True
+except Exception:
+    ANYWIDGET_INSTALLED = False
 
 WIDGET_BASE: Type[Any]
 if ANYWIDGET_INSTALLED:
@@ -48,14 +46,15 @@ class TableWidget(WIDGET_BASE):
     """
 
     def __init__(self, dataframe: bigframes.dataframe.DataFrame):
-        """
-        Initialize the TableWidget.
+        """Initialize the TableWidget.
 
         Args:
             dataframe: The Bigframes Dataframe to display in the widget.
         """
         if not ANYWIDGET_INSTALLED:
-            raise ImportError("Anywidget is not installed, cannot create TableWidget.")
+            raise ImportError(
+                "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
+            )
 
         super().__init__()
         self._dataframe = dataframe
@@ -65,13 +64,20 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
 
         # Initialize data fetching attributes.
         self._batches = dataframe.to_pandas_batches(page_size=self.page_size)
-        self._cached_data = pd.DataFrame(columns=self._dataframe.columns)
+
+        # Use list of DataFrames to avoid memory copies from concatenation
+        self._cached_batches: List[pd.DataFrame] = []
+
+        # Unique identifier for HTML table element
         self._table_id = str(uuid.uuid4())
         self._all_data_loaded = False
-        self._batch_iterator: Iterator[pd.DataFrame] | None = None
+        # Renamed from _batch_iterator to _batch_iter to avoid naming conflict
+        self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
 
         # len(dataframe) is expensive, since it will trigger a
         # SELECT COUNT(*) query. It is a must have however.
+        # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
+        # before we get here so that the count might already be cached.
         self.row_count = len(dataframe)
 
         # get the initial page
@@ -89,14 +95,13 @@ def _esm(self):
 
     @traitlets.validate("page")
     def _validate_page(self, proposal: Dict[str, Any]):
-        """
-        Validate and clamp the page number to a valid range.
+        """Validate and clamp the page number to a valid range.
 
         Args:
-            proposal:
-                A dictionary from the traitlets library containing the proposed
-                change. The new value is in proposal["value"].
+            proposal: A dictionary from the traitlets library containing the
+                proposed change. The new value is in proposal["value"].
         """
+
         value = proposal["value"]
         if self.row_count == 0 or self.page_size == 0:
             return 0
@@ -120,34 +125,39 @@ def _get_next_batch(self) -> bool:
         try:
             iterator = self._get_batch_iterator()
             batch = next(iterator)
-            self._cached_data = pd.concat([self._cached_data, batch], ignore_index=True)
+            self._cached_batches.append(batch)
             return True
         except StopIteration:
             self._all_data_loaded = True
-            # update row count if we loaded all data
-            if self.row_count == 0:
-                self.row_count = len(self._cached_data)
             return False
-        except Exception as e:
-            raise RuntimeError(f"Error during batch processing: {str(e)}") from e
 
     def _get_batch_iterator(self) -> Iterator[pd.DataFrame]:
         """Lazily initializes and returns the batch iterator."""
-        if self._batch_iterator is None:
-            self._batch_iterator = iter(self._batches)
-        return self._batch_iterator
+        if self._batch_iter is None:
+            self._batch_iter = iter(self._batches)
+        return self._batch_iter
+
+    def _get_cached_data(self) -> pd.DataFrame:
+        """Combine all cached batches into a single DataFrame."""
+        if not self._cached_batches:
+            return pd.DataFrame(columns=self._dataframe.columns)
+        return pd.concat(self._cached_batches, ignore_index=True)
 
     def _set_table_html(self):
         """Sets the current html data based on the current page and page size."""
         start = self.page * self.page_size
         end = start + self.page_size
 
         # fetch more data if the requested page is outside our cache
-        while len(self._cached_data) < end and not self._all_data_loaded:
-            self._get_next_batch()
+        cached_data = self._get_cached_data()
+        while len(cached_data) < end and not self._all_data_loaded:
+            if self._get_next_batch():
+                cached_data = self._get_cached_data()
+            else:
+                break
 
         # Get the data for the current page
-        page_data = self._cached_data.iloc[start:end]
+        page_data = cached_data.iloc[start:end]
 
         # Generate HTML table
         self.table_html = page_data.to_html(