1717from importlib import resources
1818import functools
1919import math
20- from typing import Any , Dict , Iterator , Type , TYPE_CHECKING
20+ from typing import Any , Dict , Iterator , List , Optional , Type
2121import uuid
2222
2323import pandas as pd
2424
2525import bigframes
2626
27- ANYWIDGET_INSTALLED = True
28- if TYPE_CHECKING :
27+ # Simplified import structure as suggested in review
28+ try :
2929 import anywidget
3030 import traitlets
31- else :
32- try :
33- import anywidget
34- import traitlets
35- except Exception :
36- ANYWIDGET_INSTALLED = False
31+
32+ ANYWIDGET_INSTALLED = True
33+ except Exception :
34+ ANYWIDGET_INSTALLED = False
3735
3836WIDGET_BASE : Type [Any ]
3937if ANYWIDGET_INSTALLED :
@@ -48,14 +46,15 @@ class TableWidget(WIDGET_BASE):
4846 """
4947
5048 def __init__ (self , dataframe : bigframes .dataframe .DataFrame ):
51- """
52- Initialize the TableWidget.
49+ """Initialize the TableWidget.
5350
5451 Args:
5552 dataframe: The Bigframes Dataframe to display in the widget.
5653 """
5754 if not ANYWIDGET_INSTALLED :
58- raise ImportError ("Anywidget is not installed, cannot create TableWidget." )
55+ raise ImportError (
56+ "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
57+ )
5958
6059 super ().__init__ ()
6160 self ._dataframe = dataframe
@@ -65,13 +64,20 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
6564
6665 # Initialize data fetching attributes.
6766 self ._batches = dataframe .to_pandas_batches (page_size = self .page_size )
68- self ._cached_data = pd .DataFrame (columns = self ._dataframe .columns )
67+
68+ # Use list of DataFrames to avoid memory copies from concatenation
69+ self ._cached_batches : List [pd .DataFrame ] = []
70+
71+ # Unique identifier for HTML table element
6972 self ._table_id = str (uuid .uuid4 ())
7073 self ._all_data_loaded = False
71- self ._batch_iterator : Iterator [pd .DataFrame ] | None = None
74+ # Renamed from _batch_iterator to _batch_iter to avoid naming conflict
75+ self ._batch_iter : Optional [Iterator [pd .DataFrame ]] = None
7276
7377 # len(dataframe) is expensive, since it will trigger a
7478 # SELECT COUNT(*) query. It is a must have however.
79+ # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
80+ # before we get here so that the count might already be cached.
7581 self .row_count = len (dataframe )
7682
7783 # get the initial page
@@ -89,14 +95,13 @@ def _esm(self):
8995
9096 @traitlets .validate ("page" )
9197 def _validate_page (self , proposal : Dict [str , Any ]):
92- """
93- Validate and clamp the page number to a valid range.
98+ """Validate and clamp the page number to a valid range.
9499
95100 Args:
96- proposal:
97- A dictionary from the traitlets library containing the proposed
98- change. The new value is in proposal["value"].
101+ proposal: A dictionary from the traitlets library containing the
102+ proposed change. The new value is in proposal["value"].
99103 """
104+
100105 value = proposal ["value" ]
101106 if self .row_count == 0 or self .page_size == 0 :
102107 return 0
@@ -120,34 +125,39 @@ def _get_next_batch(self) -> bool:
120125 try :
121126 iterator = self ._get_batch_iterator ()
122127 batch = next (iterator )
123- self ._cached_data = pd . concat ([ self . _cached_data , batch ], ignore_index = True )
128+ self ._cached_batches . append ( batch )
124129 return True
125130 except StopIteration :
126131 self ._all_data_loaded = True
127- # update row count if we loaded all data
128- if self .row_count == 0 :
129- self .row_count = len (self ._cached_data )
130132 return False
131- except Exception as e :
132- raise RuntimeError (f"Error during batch processing: { str (e )} " ) from e
133133
134134 def _get_batch_iterator (self ) -> Iterator [pd .DataFrame ]:
135135 """Lazily initializes and returns the batch iterator."""
136- if self ._batch_iterator is None :
137- self ._batch_iterator = iter (self ._batches )
138- return self ._batch_iterator
136+ if self ._batch_iter is None :
137+ self ._batch_iter = iter (self ._batches )
138+ return self ._batch_iter
139+
140+ def _get_cached_data (self ) -> pd .DataFrame :
141+ """Combine all cached batches into a single DataFrame."""
142+ if not self ._cached_batches :
143+ return pd .DataFrame (columns = self ._dataframe .columns )
144+ return pd .concat (self ._cached_batches , ignore_index = True )
139145
140146 def _set_table_html (self ):
141147 """Sets the current html data based on the current page and page size."""
142148 start = self .page * self .page_size
143149 end = start + self .page_size
144150
145151 # fetch more data if the requested page is outside our cache
146- while len (self ._cached_data ) < end and not self ._all_data_loaded :
147- self ._get_next_batch ()
152+ cached_data = self ._get_cached_data ()
153+ while len (cached_data ) < end and not self ._all_data_loaded :
154+ if self ._get_next_batch ():
155+ cached_data = self ._get_cached_data ()
156+ else :
157+ break
148158
149159 # Get the data for the current page
150- page_data = self . _cached_data .iloc [start :end ]
160+ page_data = cached_data .iloc [start :end ]
151161
152162 # Generate HTML table
153163 self .table_html = page_data .to_html (
0 commit comments