Skip to content

Commit cbae5d8

Browse files
committed
Merge branch 'main' into shuowei-anywidget-html-repr
2 parents 0a1df42 + 2dcf6ae commit cbae5d8

File tree

19 files changed

+7872
-106
lines changed

19 files changed

+7872
-106
lines changed

.github/workflows/js-tests.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: js-tests
2+
on:
3+
pull_request:
4+
branches:
5+
- main
6+
push:
7+
branches:
8+
- main
9+
jobs:
10+
build:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- name: Checkout
14+
uses: actions/checkout@v4
15+
- name: Install modules
16+
working-directory: ./tests/js
17+
run: npm install
18+
- name: Run tests
19+
working-directory: ./tests/js
20+
run: npm test

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ coverage.xml
5858

5959
# System test environment variables.
6060
system_tests/local_test_setup
61+
tests/js/node_modules/
6162

6263
# Make sure a generated file isn't accidentally committed.
6364
pylintrc

bigframes/core/compile/sqlglot/expressions/constants.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import math
16+
1517
import sqlglot.expressions as sge
1618

1719
_ZERO = sge.Cast(this=sge.convert(0), to="INT64")
@@ -23,3 +25,13 @@
2325
# FLOAT64 has 11 exponent bits, so max values is about 2**(2**10)
2426
# ln(2**(2**10)) == (2**10)*ln(2) ~= 709.78, so EXP(x) for x>709.78 will overflow.
2527
_FLOAT64_EXP_BOUND = sge.convert(709.78)
28+
29+
# The natural logarithm of the maximum value for a signed 64-bit integer.
30+
# This is used to check for potential overflows in power operations involving integers
31+
# by checking if `exponent * log(base)` exceeds this value.
32+
_INT64_LOG_BOUND = math.log(2**63 - 1)
33+
34+
# Represents the largest integer N where all integers from -N to N can be
35+
# represented exactly as a float64. Float64 types have a 53-bit significand precision,
36+
# so integers beyond this value may lose precision.
37+
_FLOAT64_MAX_INT_PRECISION = 2**53

bigframes/core/compile/sqlglot/expressions/numeric_ops.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,141 @@ def _(expr: TypedExpr) -> sge.Expression:
210210
return expr.expr
211211

212212

213+
@register_binary_op(ops.pow_op)
214+
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
215+
left_expr = _coerce_bool_to_int(left)
216+
right_expr = _coerce_bool_to_int(right)
217+
if left.dtype == dtypes.INT_DTYPE and right.dtype == dtypes.INT_DTYPE:
218+
return _int_pow_op(left_expr, right_expr)
219+
else:
220+
return _float_pow_op(left_expr, right_expr)
221+
222+
223+
def _int_pow_op(
224+
left_expr: sge.Expression, right_expr: sge.Expression
225+
) -> sge.Expression:
226+
overflow_cond = sge.and_(
227+
sge.NEQ(this=left_expr, expression=sge.convert(0)),
228+
sge.GT(
229+
this=sge.Mul(
230+
this=right_expr, expression=sge.Ln(this=sge.Abs(this=left_expr))
231+
),
232+
expression=sge.convert(constants._INT64_LOG_BOUND),
233+
),
234+
)
235+
236+
return sge.Case(
237+
ifs=[
238+
sge.If(
239+
this=overflow_cond,
240+
true=sge.Null(),
241+
)
242+
],
243+
default=sge.Cast(
244+
this=sge.Pow(
245+
this=sge.Cast(
246+
this=left_expr, to=sge.DataType(this=sge.DataType.Type.DECIMAL)
247+
),
248+
expression=right_expr,
249+
),
250+
to="INT64",
251+
),
252+
)
253+
254+
255+
def _float_pow_op(
256+
left_expr: sge.Expression, right_expr: sge.Expression
257+
) -> sge.Expression:
258+
# Most conditions here seek to prevent calling BQ POW with inputs that would generate errors.
259+
# See: https://cloud.google.com/bigquery/docs/reference/standard-sql/mathematical_functions#pow
260+
overflow_cond = sge.and_(
261+
sge.NEQ(this=left_expr, expression=constants._ZERO),
262+
sge.GT(
263+
this=sge.Mul(
264+
this=right_expr, expression=sge.Ln(this=sge.Abs(this=left_expr))
265+
),
266+
expression=constants._FLOAT64_EXP_BOUND,
267+
),
268+
)
269+
270+
# Float64 lose integer precision beyond 2**53, beyond this insufficient precision to get parity
271+
exp_too_big = sge.GT(
272+
this=sge.Abs(this=right_expr),
273+
expression=sge.convert(constants._FLOAT64_MAX_INT_PRECISION),
274+
)
275+
# Treat very large exponents as +=INF
276+
norm_exp = sge.Case(
277+
ifs=[
278+
sge.If(
279+
this=exp_too_big,
280+
true=sge.Mul(this=constants._INF, expression=sge.Sign(this=right_expr)),
281+
)
282+
],
283+
default=right_expr,
284+
)
285+
286+
pow_result = sge.Pow(this=left_expr, expression=norm_exp)
287+
288+
# This cast is dangerous, need to only excuted where y_val has been bounds-checked
289+
# Ibis needs try_cast binding to bq safe_cast
290+
exponent_is_whole = sge.EQ(
291+
this=sge.Cast(this=right_expr, to="INT64"), expression=right_expr
292+
)
293+
odd_exponent = sge.and_(
294+
sge.LT(this=left_expr, expression=constants._ZERO),
295+
sge.EQ(
296+
this=sge.Mod(
297+
this=sge.Cast(this=right_expr, to="INT64"), expression=sge.convert(2)
298+
),
299+
expression=sge.convert(1),
300+
),
301+
)
302+
infinite_base = sge.EQ(this=sge.Abs(this=left_expr), expression=constants._INF)
303+
304+
return sge.Case(
305+
ifs=[
306+
# Might be able to do something more clever with x_val==0 case
307+
sge.If(
308+
this=sge.EQ(this=right_expr, expression=constants._ZERO),
309+
true=sge.convert(1),
310+
),
311+
sge.If(
312+
this=sge.EQ(this=left_expr, expression=sge.convert(1)),
313+
true=sge.convert(1),
314+
), # Need to ignore exponent, even if it is NA
315+
sge.If(
316+
this=sge.and_(
317+
sge.EQ(this=left_expr, expression=constants._ZERO),
318+
sge.LT(this=right_expr, expression=constants._ZERO),
319+
),
320+
true=constants._INF,
321+
), # This case would error POW function in BQ
322+
sge.If(this=infinite_base, true=pow_result),
323+
sge.If(
324+
this=exp_too_big, true=pow_result
325+
), # Bigquery can actually handle the +-inf cases gracefully
326+
sge.If(
327+
this=sge.and_(
328+
sge.LT(this=left_expr, expression=constants._ZERO),
329+
sge.Not(this=exponent_is_whole),
330+
),
331+
true=constants._NAN,
332+
),
333+
sge.If(
334+
this=overflow_cond,
335+
true=sge.Mul(
336+
this=constants._INF,
337+
expression=sge.Case(
338+
ifs=[sge.If(this=odd_exponent, true=sge.convert(-1))],
339+
default=sge.convert(1),
340+
),
341+
),
342+
), # finite overflows would cause bq to error
343+
],
344+
default=pow_result,
345+
)
346+
347+
213348
@register_unary_op(ops.sqrt_op)
214349
def _(expr: TypedExpr) -> sge.Expression:
215350
return sge.Case(

bigframes/display/anywidget.py

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from __future__ import annotations
1818

19+
import dataclasses
1920
from importlib import resources
2021
import functools
2122
import math
@@ -28,6 +29,7 @@
2829
from bigframes.core import blocks
2930
import bigframes.dataframe
3031
import bigframes.display.html
32+
import bigframes.dtypes as dtypes
3133

3234
# anywidget and traitlets are optional dependencies. We don't want the import of
3335
# this module to fail if they aren't installed, though. Instead, we try to
@@ -48,6 +50,12 @@
4850
WIDGET_BASE = object
4951

5052

53+
@dataclasses.dataclass(frozen=True)
54+
class _SortState:
55+
column: str
56+
ascending: bool
57+
58+
5159
class TableWidget(WIDGET_BASE):
5260
"""An interactive, paginated table widget for BigFrames DataFrames.
5361
@@ -63,6 +71,9 @@ class TableWidget(WIDGET_BASE):
6371
allow_none=True,
6472
).tag(sync=True)
6573
table_html = traitlets.Unicode().tag(sync=True)
74+
sort_column = traitlets.Unicode("").tag(sync=True)
75+
sort_ascending = traitlets.Bool(True).tag(sync=True)
76+
orderable_columns = traitlets.List(traitlets.Unicode(), []).tag(sync=True)
6677
_initial_load_complete = traitlets.Bool(False).tag(sync=True)
6778
_batches: Optional[blocks.PandasBatches] = None
6879
_error_message = traitlets.Unicode(allow_none=True, default_value=None).tag(
@@ -89,15 +100,25 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
89100
self._all_data_loaded = False
90101
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
91102
self._cached_batches: List[pd.DataFrame] = []
103+
self._last_sort_state: Optional[_SortState] = None
92104

93105
# respect display options for initial page size
94106
initial_page_size = bigframes.options.display.max_rows
95107

96108
# set traitlets properties that trigger observers
109+
# TODO(b/462525985): Investigate and improve TableWidget UX for DataFrames with a large number of columns.
97110
self.page_size = initial_page_size
111+
# TODO(b/463754889): Support non-string column labels for sorting.
112+
if all(isinstance(col, str) for col in dataframe.columns):
113+
self.orderable_columns = [
114+
str(col_name)
115+
for col_name, dtype in dataframe.dtypes.items()
116+
if dtypes.is_orderable(dtype)
117+
]
118+
else:
119+
self.orderable_columns = []
98120

99-
# len(dataframe) is expensive, since it will trigger a
100-
# SELECT COUNT(*) query. It is a must have however.
121+
# obtain the row counts
101122
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
102123
# before we get here so that the count might already be cached.
103124
self._reset_batches_for_new_page_size()
@@ -121,6 +142,11 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
121142
# Also used as a guard to prevent observers from firing during initialization.
122143
self._initial_load_complete = True
123144

145+
@traitlets.observe("_initial_load_complete")
146+
def _on_initial_load_complete(self, change: Dict[str, Any]):
147+
if change["new"]:
148+
self._set_table_html()
149+
124150
@functools.cached_property
125151
def _esm(self):
126152
"""Load JavaScript code from external file."""
@@ -221,13 +247,17 @@ def _cached_data(self) -> pd.DataFrame:
221247
return pd.DataFrame(columns=self._dataframe.columns)
222248
return pd.concat(self._cached_batches, ignore_index=True)
223249

250+
def _reset_batch_cache(self) -> None:
251+
"""Resets batch caching attributes."""
252+
self._cached_batches = []
253+
self._batch_iter = None
254+
self._all_data_loaded = False
255+
224256
def _reset_batches_for_new_page_size(self) -> None:
225257
"""Reset the batch iterator when page size changes."""
226258
self._batches = self._dataframe._to_pandas_batches(page_size=self.page_size)
227259

228-
self._cached_batches = []
229-
self._batch_iter = None
230-
self._all_data_loaded = False
260+
self._reset_batch_cache()
231261

232262
def _set_table_html(self) -> None:
233263
"""Sets the current html data based on the current page and page size."""
@@ -237,6 +267,21 @@ def _set_table_html(self) -> None:
237267
)
238268
return
239269

270+
# Apply sorting if a column is selected
271+
df_to_display = self._dataframe
272+
if self.sort_column:
273+
# TODO(b/463715504): Support sorting by index columns.
274+
df_to_display = df_to_display.sort_values(
275+
by=self.sort_column, ascending=self.sort_ascending
276+
)
277+
278+
# Reset batches when sorting changes
279+
if self._last_sort_state != _SortState(self.sort_column, self.sort_ascending):
280+
self._batches = df_to_display._to_pandas_batches(page_size=self.page_size)
281+
self._reset_batch_cache()
282+
self._last_sort_state = _SortState(self.sort_column, self.sort_ascending)
283+
self.page = 0 # Reset to first page
284+
240285
start = self.page * self.page_size
241286
end = start + self.page_size
242287

@@ -272,8 +317,14 @@ def _set_table_html(self) -> None:
272317
self.table_html = bigframes.display.html.render_html(
273318
dataframe=page_data,
274319
table_id=f"table-{self._table_id}",
320+
orderable_columns=self.orderable_columns,
275321
)
276322

323+
@traitlets.observe("sort_column", "sort_ascending")
324+
def _sort_changed(self, _change: Dict[str, Any]):
325+
"""Handler for when sorting parameters change from the frontend."""
326+
self._set_table_html()
327+
277328
@traitlets.observe("page")
278329
def _page_changed(self, _change: Dict[str, Any]) -> None:
279330
"""Handler for when the page number is changed from the frontend."""
@@ -288,6 +339,9 @@ def _page_size_changed(self, _change: Dict[str, Any]) -> None:
288339
return
289340
# Reset the page to 0 when page size changes to avoid invalid page states
290341
self.page = 0
342+
# Reset the sort state to default (no sort)
343+
self.sort_column = ""
344+
self.sort_ascending = True
291345

292346
# Reset batches to use new page size for future data fetching
293347
self._reset_batches_for_new_page_size()

bigframes/display/html.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717
from __future__ import annotations
1818

1919
import html
20+
from typing import Any
2021

2122
import pandas as pd
2223
import pandas.api.types
2324

2425
from bigframes._config import options
2526

2627

27-
def _is_dtype_numeric(dtype) -> bool:
28+
def _is_dtype_numeric(dtype: Any) -> bool:
2829
"""Check if a dtype is numeric for alignment purposes."""
2930
return pandas.api.types.is_numeric_dtype(dtype)
3031

@@ -33,18 +34,31 @@ def render_html(
3334
*,
3435
dataframe: pd.DataFrame,
3536
table_id: str,
37+
orderable_columns: list[str] | None = None,
3638
) -> str:
3739
"""Render a pandas DataFrame to HTML with specific styling."""
3840
classes = "dataframe table table-striped table-hover"
3941
table_html = [f'<table border="1" class="{classes}" id="{table_id}">']
4042
precision = options.display.precision
43+
orderable_columns = orderable_columns or []
4144

4245
# Render table head
4346
table_html.append(" <thead>")
4447
table_html.append(' <tr style="text-align: left;">')
4548
for col in dataframe.columns:
49+
th_classes = []
50+
if col in orderable_columns:
51+
th_classes.append("sortable")
52+
class_str = f'class="{" ".join(th_classes)}"' if th_classes else ""
53+
header_div = (
54+
'<div style="resize: horizontal; overflow: auto; '
55+
"box-sizing: border-box; width: 100%; height: 100%; "
56+
'padding: 0.5em;">'
57+
f"{html.escape(str(col))}"
58+
"</div>"
59+
)
4660
table_html.append(
47-
f' <th style="text-align: left;"><div style="resize: horizontal; overflow: auto; box-sizing: border-box; width: 100%; height: 100%; padding: 0.5em;">{html.escape(str(col))}</div></th>'
61+
f' <th style="text-align: left;" {class_str}>{header_div}</th>'
4862
)
4963
table_html.append(" </tr>")
5064
table_html.append(" </thead>")

0 commit comments

Comments
 (0)