Skip to content

Commit 65107b2

Browse files
Merge remote-tracking branch 'github/main' into series_contains
2 parents 65c003f + 1aa7950 commit 65107b2

File tree

16 files changed

+976
-25
lines changed

16 files changed

+976
-25
lines changed

.pre-commit-config.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,8 @@ repos:
4242
additional_dependencies: [types-requests, types-tabulate, types-PyYAML, pandas-stubs<=2.2.3.241126]
4343
exclude: "^third_party"
4444
args: ["--check-untyped-defs", "--explicit-package-bases", "--ignore-missing-imports"]
45+
- repo: https://github.com/biomejs/pre-commit
46+
rev: v2.0.2
47+
hooks:
48+
- id: biome-check
49+
files: '\.js$'

MANIFEST.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
# Generated by synthtool. DO NOT EDIT!
1818
include README.rst LICENSE
1919
recursive-include third_party/bigframes_vendored *
20-
recursive-include bigframes *.json *.proto py.typed
20+
recursive-include bigframes *.json *.proto *.js py.typed
2121
recursive-include tests *
2222
global-exclude *.py[co]
2323
global-exclude __pycache__

bigframes/core/compile/polars/compiler.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,14 @@ def compile_offsets(self, node: nodes.PromoteOffsetsNode):
487487
def compile_join(self, node: nodes.JoinNode):
488488
left = self.compile_node(node.left_child)
489489
right = self.compile_node(node.right_child)
490-
left_on = [l_name.id.sql for l_name, _ in node.conditions]
491-
right_on = [r_name.id.sql for _, r_name in node.conditions]
490+
491+
left_on = []
492+
right_on = []
493+
for left_ex, right_ex in node.conditions:
494+
left_ex, right_ex = lowering._coerce_comparables(left_ex, right_ex)
495+
left_on.append(self.expr_compiler.compile_expression(left_ex))
496+
right_on.append(self.expr_compiler.compile_expression(right_ex))
497+
492498
if node.type == "right":
493499
return self._ordered_join(
494500
right, left, "left", right_on, left_on, node.joins_nulls
@@ -502,8 +508,8 @@ def _ordered_join(
502508
left_frame: pl.LazyFrame,
503509
right_frame: pl.LazyFrame,
504510
how: Literal["inner", "outer", "left", "cross"],
505-
left_on: Sequence[str],
506-
right_on: Sequence[str],
511+
left_on: Sequence[pl.Expr],
512+
right_on: Sequence[pl.Expr],
507513
join_nulls: bool,
508514
):
509515
if how == "right":

bigframes/dataframe.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -782,22 +782,7 @@ def _repr_html_(self) -> str:
782782
if opts.repr_mode == "deferred":
783783
return formatter.repr_query_job(self._compute_dry_run())
784784

785-
if opts.repr_mode == "anywidget":
786-
import anywidget # type: ignore
787-
788-
# create an iterator for the data batches
789-
batches = self.to_pandas_batches()
790-
791-
# get the first page result
792-
try:
793-
first_page = next(iter(batches))
794-
except StopIteration:
795-
first_page = pandas.DataFrame(columns=self.columns)
796-
797-
# Instantiate and return the widget. The widget's frontend will
798-
# handle the display of the table and pagination
799-
return anywidget.AnyWidget(dataframe=first_page)
800-
785+
# Process blob columns first, regardless of display mode
801786
self._cached()
802787
df = self.copy()
803788
if bigframes.options.display.blob_display:
@@ -809,7 +794,31 @@ def _repr_html_(self) -> str:
809794
for col in blob_cols:
810795
# TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data.
811796
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
797+
else:
798+
blob_cols = []
799+
800+
if opts.repr_mode == "anywidget":
801+
try:
802+
from IPython.display import display as ipython_display
803+
804+
from bigframes import display
805+
806+
# Always create a new widget instance for each display call
807+
# This ensures that each cell gets its own widget and prevents
808+
# unintended sharing between cells
809+
widget = display.TableWidget(df.copy())
812810

811+
ipython_display(widget)
812+
return "" # Return empty string since we used display()
813+
814+
except (AttributeError, ValueError, ImportError):
815+
# Fallback if anywidget is not available
816+
warnings.warn(
817+
"Anywidget mode is not available. Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. Falling back to deferred mode."
818+
)
819+
return formatter.repr_query_job(self._compute_dry_run())
820+
821+
# Continue with regular HTML rendering for non-anywidget modes
813822
# TODO(swast): pass max_columns and get the true column count back. Maybe
814823
# get 1 more column than we have requested so that pandas can add the
815824
# ... for us?
@@ -818,7 +827,6 @@ def _repr_html_(self) -> str:
818827
)
819828

820829
self._set_internal_query_job(query_job)
821-
822830
column_count = len(pandas_df.columns)
823831

824832
with display_options.pandas_repr(opts):

bigframes/display/__init__.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
try:
18+
import anywidget # noqa
19+
20+
from bigframes.display.anywidget import TableWidget
21+
22+
__all__ = ["TableWidget"]
23+
except Exception:
24+
pass

bigframes/display/anywidget.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from importlib import resources
18+
import functools
19+
import math
20+
from typing import Any, Dict, Iterator, List, Optional, Type
21+
import uuid
22+
23+
import pandas as pd
24+
25+
import bigframes
26+
27+
# anywidget and traitlets are optional dependencies. We don't want the import of this
28+
# module to fail if they aren't installed, though. Instead, we try to limit the surface that
29+
# these packages could affect. This makes unit testing easier and ensures we don't
30+
# accidentally make these required packages.
31+
try:
32+
import anywidget
33+
import traitlets
34+
35+
ANYWIDGET_INSTALLED = True
36+
except Exception:
37+
ANYWIDGET_INSTALLED = False
38+
39+
WIDGET_BASE: Type[Any]
40+
if ANYWIDGET_INSTALLED:
41+
WIDGET_BASE = anywidget.AnyWidget
42+
else:
43+
WIDGET_BASE = object
44+
45+
46+
class TableWidget(WIDGET_BASE):
47+
"""
48+
An interactive, paginated table widget for BigFrames DataFrames.
49+
"""
50+
51+
def __init__(self, dataframe: bigframes.dataframe.DataFrame):
52+
"""Initialize the TableWidget.
53+
54+
Args:
55+
dataframe: The Bigframes Dataframe to display in the widget.
56+
"""
57+
if not ANYWIDGET_INSTALLED:
58+
raise ImportError(
59+
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
60+
)
61+
62+
super().__init__()
63+
self._dataframe = dataframe
64+
65+
# respect display options
66+
self.page_size = bigframes.options.display.max_rows
67+
68+
# Initialize data fetching attributes.
69+
self._batches = dataframe.to_pandas_batches(page_size=self.page_size)
70+
71+
# Use list of DataFrames to avoid memory copies from concatenation
72+
self._cached_batches: List[pd.DataFrame] = []
73+
74+
# Unique identifier for HTML table element
75+
self._table_id = str(uuid.uuid4())
76+
self._all_data_loaded = False
77+
# Renamed from _batch_iterator to _batch_iter to avoid naming conflict
78+
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
79+
80+
# len(dataframe) is expensive, since it will trigger a
81+
# SELECT COUNT(*) query. It is a must have however.
82+
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
83+
# before we get here so that the count might already be cached.
84+
self.row_count = len(dataframe)
85+
86+
# get the initial page
87+
self._set_table_html()
88+
89+
@functools.cached_property
90+
def _esm(self):
91+
"""Load JavaScript code from external file."""
92+
return resources.read_text(bigframes.display, "table_widget.js")
93+
94+
page = traitlets.Int(0).tag(sync=True)
95+
page_size = traitlets.Int(25).tag(sync=True)
96+
row_count = traitlets.Int(0).tag(sync=True)
97+
table_html = traitlets.Unicode().tag(sync=True)
98+
99+
@traitlets.validate("page")
100+
def _validate_page(self, proposal: Dict[str, Any]):
101+
"""Validate and clamp the page number to a valid range.
102+
103+
Args:
104+
proposal: A dictionary from the traitlets library containing the
105+
proposed change. The new value is in proposal["value"].
106+
"""
107+
108+
value = proposal["value"]
109+
if self.row_count == 0 or self.page_size == 0:
110+
return 0
111+
112+
# Calculate the zero-indexed maximum page number.
113+
max_page = max(0, math.ceil(self.row_count / self.page_size) - 1)
114+
115+
# Clamp the proposed value to the valid range [0, max_page].
116+
return max(0, min(value, max_page))
117+
118+
def _get_next_batch(self) -> bool:
119+
"""
120+
Gets the next batch of data from the generator and appends to cache.
121+
122+
Return:
123+
True if a batch was successfully loaded, False otherwise.
124+
"""
125+
if self._all_data_loaded:
126+
return False
127+
128+
try:
129+
iterator = self._batch_iterator
130+
batch = next(iterator)
131+
self._cached_batches.append(batch)
132+
return True
133+
except StopIteration:
134+
self._all_data_loaded = True
135+
return False
136+
137+
@property
138+
def _batch_iterator(self) -> Iterator[pd.DataFrame]:
139+
"""Lazily initializes and returns the batch iterator."""
140+
if self._batch_iter is None:
141+
self._batch_iter = iter(self._batches)
142+
return self._batch_iter
143+
144+
@property
145+
def _cached_data(self) -> pd.DataFrame:
146+
"""Combine all cached batches into a single DataFrame."""
147+
if not self._cached_batches:
148+
return pd.DataFrame(columns=self._dataframe.columns)
149+
return pd.concat(self._cached_batches, ignore_index=True)
150+
151+
def _set_table_html(self):
152+
"""Sets the current html data based on the current page and page size."""
153+
start = self.page * self.page_size
154+
end = start + self.page_size
155+
156+
# fetch more data if the requested page is outside our cache
157+
cached_data = self._cached_data
158+
while len(cached_data) < end and not self._all_data_loaded:
159+
if self._get_next_batch():
160+
cached_data = self._cached_data
161+
else:
162+
break
163+
164+
# Get the data for the current page
165+
page_data = cached_data.iloc[start:end]
166+
167+
# Generate HTML table
168+
self.table_html = page_data.to_html(
169+
index=False,
170+
max_rows=None,
171+
table_id=f"table-{self._table_id}",
172+
classes="table table-striped table-hover",
173+
escape=False,
174+
)
175+
176+
@traitlets.observe("page")
177+
def _page_changed(self, change):
178+
"""Handler for when the page number is changed from the frontend."""
179+
self._set_table_html()

0 commit comments

Comments
 (0)