Skip to content

Commit 700f96c

Browse files
committed
feat: display custom multi index column in anywidget mode
1 parent 5b4fdf0 commit 700f96c

File tree

8 files changed

+357
-96
lines changed

8 files changed

+357
-96
lines changed

bigframes/dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1969,7 +1969,7 @@ def to_pandas_batches(
19691969
max_results: Optional[int] = None,
19701970
*,
19711971
allow_large_results: Optional[bool] = None,
1972-
) -> Iterable[pandas.DataFrame]:
1972+
) -> blocks.PandasBatches:
19731973
"""Stream DataFrame results to an iterable of pandas DataFrame.
19741974
19751975
page_size and max_results determine the size and number of batches,

bigframes/display/anywidget.py

Lines changed: 83 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,10 @@ def _get_next_batch(self) -> bool:
229229
except StopIteration:
230230
self._all_data_loaded = True
231231
return False
232+
except Exception as e:
233+
# Handle other potential errors
234+
self._error_message = f"Error loading data: {str(e)}"
235+
return False
232236

233237
@property
234238
def _batch_iterator(self) -> Iterator[pd.DataFrame]:
@@ -255,80 +259,94 @@ def _reset_batch_cache(self) -> None:
255259

256260
def _reset_batches_for_new_page_size(self) -> None:
257261
"""Reset the batch iterator when page size changes."""
258-
self._batches = self._dataframe._to_pandas_batches(page_size=self.page_size)
262+
self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
259263

260264
self._reset_batch_cache()
261265

262266
def _set_table_html(self) -> None:
263267
"""Sets the current html data based on the current page and page size."""
264-
if self._error_message:
265-
self.table_html = (
266-
f"<div class='bigframes-error-message'>{self._error_message}</div>"
267-
)
268+
if hasattr(self, "_setting_html"):
268269
return
270+
self._setting_html = True
269271

270-
# Apply sorting if a column is selected
271-
df_to_display = self._dataframe
272-
if self.sort_column:
273-
# TODO(b/463715504): Support sorting by index columns.
274-
df_to_display = df_to_display.sort_values(
275-
by=self.sort_column, ascending=self.sort_ascending
276-
)
277-
278-
# Reset batches when sorting changes
279-
if self._last_sort_state != _SortState(self.sort_column, self.sort_ascending):
280-
self._batches = df_to_display._to_pandas_batches(page_size=self.page_size)
281-
self._reset_batch_cache()
282-
self._last_sort_state = _SortState(self.sort_column, self.sort_ascending)
283-
self.page = 0 # Reset to first page
284-
285-
start = self.page * self.page_size
286-
end = start + self.page_size
287-
288-
# fetch more data if the requested page is outside our cache
289-
cached_data = self._cached_data
290-
while len(cached_data) < end and not self._all_data_loaded:
291-
if self._get_next_batch():
292-
cached_data = self._cached_data
272+
try:
273+
if self._error_message:
274+
self.table_html = (
275+
f"<div class='bigframes-error-message'>{self._error_message}</div>"
276+
)
277+
return
278+
279+
# Apply sorting if a column is selected
280+
df_to_display = self._dataframe
281+
if self.sort_column:
282+
# TODO(b/463715504): Support sorting by index columns.
283+
df_to_display = df_to_display.sort_values(
284+
by=self.sort_column, ascending=self.sort_ascending
285+
)
286+
287+
# Reset batches when sorting changes
288+
if self._last_sort_state != _SortState(
289+
self.sort_column, self.sort_ascending
290+
):
291+
self._batches = df_to_display.to_pandas_batches(
292+
page_size=self.page_size
293+
)
294+
self._reset_batch_cache()
295+
self._last_sort_state = _SortState(
296+
self.sort_column, self.sort_ascending
297+
)
298+
self.page = 0 # Reset to first page
299+
300+
start = self.page * self.page_size
301+
end = start + self.page_size
302+
303+
# fetch more data if the requested page is outside our cache
304+
cached_data = self._cached_data
305+
while len(cached_data) < end and not self._all_data_loaded:
306+
if self._get_next_batch():
307+
cached_data = self._cached_data
308+
else:
309+
break
310+
311+
# Get the data for the current page
312+
page_data = cached_data.iloc[start:end].copy()
313+
314+
# Handle index display
315+
# TODO(b/438181139): Add tests for custom multiindex
316+
if self._dataframe._block.has_index:
317+
index_name = page_data.index.name
318+
page_data.insert(
319+
0, index_name if index_name is not None else "", page_data.index
320+
)
293321
else:
294-
break
295-
296-
# Get the data for the current page
297-
page_data = cached_data.iloc[start:end].copy()
298-
299-
# Handle index display
300-
# TODO(b/438181139): Add tests for custom multiindex
301-
if self._dataframe._block.has_index:
302-
index_name = page_data.index.name
303-
page_data.insert(
304-
0, index_name if index_name is not None else "", page_data.index
322+
# Default index - include as "Row" column
323+
page_data.insert(0, "Row", range(start + 1, start + len(page_data) + 1))
324+
# Handle case where user navigated beyond available data with unknown row count
325+
is_unknown_count = self.row_count is None
326+
is_beyond_data = (
327+
self._all_data_loaded and len(page_data) == 0 and self.page > 0
305328
)
306-
else:
307-
# Default index - include as "Row" column
308-
page_data.insert(0, "Row", range(start + 1, start + len(page_data) + 1))
309-
# Handle case where user navigated beyond available data with unknown row count
310-
is_unknown_count = self.row_count is None
311-
is_beyond_data = self._all_data_loaded and len(page_data) == 0 and self.page > 0
312-
if is_unknown_count and is_beyond_data:
313-
# Calculate the last valid page (zero-indexed)
314-
total_rows = len(cached_data)
315-
if total_rows > 0:
316-
last_valid_page = max(0, math.ceil(total_rows / self.page_size) - 1)
317-
# Navigate back to the last valid page
318-
self.page = last_valid_page
319-
# Recursively call to display the correct page
320-
return self._set_table_html()
321-
else:
322-
# If no data at all, stay on page 0 with empty display
323-
self.page = 0
324-
return self._set_table_html()
325-
326-
# Generate HTML table
327-
self.table_html = bigframes.display.html.render_html(
328-
dataframe=page_data,
329-
table_id=f"table-{self._table_id}",
330-
orderable_columns=self.orderable_columns,
331-
)
329+
if is_unknown_count and is_beyond_data:
330+
# Calculate the last valid page (zero-indexed)
331+
total_rows = len(cached_data)
332+
if total_rows > 0:
333+
last_valid_page = max(0, math.ceil(total_rows / self.page_size) - 1)
334+
# Navigate back to the last valid page
335+
self.page = last_valid_page
336+
# Recursively call to display the correct page
337+
return self._set_table_html()
338+
else:
339+
# If no data at all, stay on page 0 with empty display
340+
self.page = 0
341+
return self._set_table_html()
342+
343+
# Generate HTML table
344+
self.table_html = bigframes.display.html.render_html(
345+
dataframe=page_data,
346+
table_id=f"table-{self._table_id}",
347+
)
348+
finally:
349+
delattr(self, "_setting_html")
332350

333351
@traitlets.observe("sort_column", "sort_ascending")
334352
def _sort_changed(self, _change: Dict[str, Any]):

0 commit comments

Comments
 (0)