1616
1717from __future__ import annotations
1818
19+ import dataclasses
1920from importlib import resources
2021import functools
2122import math
2829from bigframes .core import blocks
2930import bigframes .dataframe
3031import bigframes .display .html
32+ import bigframes .dtypes as dtypes
3133
3234# anywidget and traitlets are optional dependencies. We don't want the import of
3335# this module to fail if they aren't installed, though. Instead, we try to
4850 WIDGET_BASE = object
4951
5052
53+ @dataclasses .dataclass (frozen = True )
54+ class _SortState :
55+ column : str
56+ ascending : bool
57+
58+
5159class TableWidget (WIDGET_BASE ):
5260 """An interactive, paginated table widget for BigFrames DataFrames.
5361
@@ -63,6 +71,9 @@ class TableWidget(WIDGET_BASE):
6371 allow_none = True ,
6472 ).tag (sync = True )
6573 table_html = traitlets .Unicode ().tag (sync = True )
74+ sort_column = traitlets .Unicode ("" ).tag (sync = True )
75+ sort_ascending = traitlets .Bool (True ).tag (sync = True )
76+ orderable_columns = traitlets .List (traitlets .Unicode (), []).tag (sync = True )
6677 _initial_load_complete = traitlets .Bool (False ).tag (sync = True )
6778 _batches : Optional [blocks .PandasBatches ] = None
6879 _error_message = traitlets .Unicode (allow_none = True , default_value = None ).tag (
@@ -89,15 +100,25 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
89100 self ._all_data_loaded = False
90101 self ._batch_iter : Optional [Iterator [pd .DataFrame ]] = None
91102 self ._cached_batches : List [pd .DataFrame ] = []
103+ self ._last_sort_state : Optional [_SortState ] = None
92104
93105 # respect display options for initial page size
94106 initial_page_size = bigframes .options .display .max_rows
95107
96108 # set traitlets properties that trigger observers
109+ # TODO(b/462525985): Investigate and improve TableWidget UX for DataFrames with a large number of columns.
97110 self .page_size = initial_page_size
111+ # TODO(b/463754889): Support non-string column labels for sorting.
112+ if all (isinstance (col , str ) for col in dataframe .columns ):
113+ self .orderable_columns = [
114+ str (col_name )
115+ for col_name , dtype in dataframe .dtypes .items ()
116+ if dtypes .is_orderable (dtype )
117+ ]
118+ else :
119+ self .orderable_columns = []
98120
99- # len(dataframe) is expensive, since it will trigger a
100- # SELECT COUNT(*) query. It is a must have however.
121+ # obtain the row counts
101122 # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
102123 # before we get here so that the count might already be cached.
103124 self ._reset_batches_for_new_page_size ()
@@ -121,6 +142,11 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
121142 # Also used as a guard to prevent observers from firing during initialization.
122143 self ._initial_load_complete = True
123144
145+ @traitlets .observe ("_initial_load_complete" )
146+ def _on_initial_load_complete (self , change : Dict [str , Any ]):
147+ if change ["new" ]:
148+ self ._set_table_html ()
149+
124150 @functools .cached_property
125151 def _esm (self ):
126152 """Load JavaScript code from external file."""
@@ -221,13 +247,17 @@ def _cached_data(self) -> pd.DataFrame:
221247 return pd .DataFrame (columns = self ._dataframe .columns )
222248 return pd .concat (self ._cached_batches , ignore_index = True )
223249
250+ def _reset_batch_cache (self ) -> None :
251+ """Resets batch caching attributes."""
252+ self ._cached_batches = []
253+ self ._batch_iter = None
254+ self ._all_data_loaded = False
255+
224256 def _reset_batches_for_new_page_size (self ) -> None :
225257 """Reset the batch iterator when page size changes."""
226258 self ._batches = self ._dataframe ._to_pandas_batches (page_size = self .page_size )
227259
228- self ._cached_batches = []
229- self ._batch_iter = None
230- self ._all_data_loaded = False
260+ self ._reset_batch_cache ()
231261
232262 def _set_table_html (self ) -> None :
233263 """Sets the current html data based on the current page and page size."""
@@ -237,6 +267,21 @@ def _set_table_html(self) -> None:
237267 )
238268 return
239269
270+ # Apply sorting if a column is selected
271+ df_to_display = self ._dataframe
272+ if self .sort_column :
273+ # TODO(b/463715504): Support sorting by index columns.
274+ df_to_display = df_to_display .sort_values (
275+ by = self .sort_column , ascending = self .sort_ascending
276+ )
277+
278+ # Reset batches when sorting changes
279+ if self ._last_sort_state != _SortState (self .sort_column , self .sort_ascending ):
280+ self ._batches = df_to_display ._to_pandas_batches (page_size = self .page_size )
281+ self ._reset_batch_cache ()
282+ self ._last_sort_state = _SortState (self .sort_column , self .sort_ascending )
283+ self .page = 0 # Reset to first page
284+
240285 start = self .page * self .page_size
241286 end = start + self .page_size
242287
@@ -272,8 +317,14 @@ def _set_table_html(self) -> None:
272317 self .table_html = bigframes .display .html .render_html (
273318 dataframe = page_data ,
274319 table_id = f"table-{ self ._table_id } " ,
320+ orderable_columns = self .orderable_columns ,
275321 )
276322
323+ @traitlets .observe ("sort_column" , "sort_ascending" )
324+ def _sort_changed (self , _change : Dict [str , Any ]):
325+ """Handler for when sorting parameters change from the frontend."""
326+ self ._set_table_html ()
327+
277328 @traitlets .observe ("page" )
278329 def _page_changed (self , _change : Dict [str , Any ]) -> None :
279330 """Handler for when the page number is changed from the frontend."""
@@ -288,6 +339,9 @@ def _page_size_changed(self, _change: Dict[str, Any]) -> None:
288339 return
289340 # Reset the page to 0 when page size changes to avoid invalid page states
290341 self .page = 0
342+ # Reset the sort state to default (no sort)
343+ self .sort_column = ""
344+ self .sort_ascending = True
291345
292346 # Reset batches to use new page size for future data fetching
293347 self ._reset_batches_for_new_page_size ()
0 commit comments