Skip to content

Commit 9d17286

Browse files
Merge branch 'main' into polars_semi
2 parents 683f349 + 0fffc49 commit 9d17286

File tree

14 files changed

+225
-90
lines changed

14 files changed

+225
-90
lines changed

bigframes/_config/display_options.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class DisplayOptions:
2929
max_columns: int = 20
3030
max_rows: int = 25
3131
progress_bar: Optional[str] = "auto"
32-
repr_mode: Literal["head", "deferred"] = "head"
32+
repr_mode: Literal["head", "deferred", "anywidget"] = "head"
3333

3434
max_info_columns: int = 100
3535
max_info_rows: Optional[int] = 200000

bigframes/core/compile/googlesql/query.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,13 @@ def _select_field(self, field) -> SelectExpression:
8383
return SelectExpression(expression=expr.ColumnExpression(name=field))
8484

8585
else:
86-
alias = field[1] if (field[0] != field[1]) else None
86+
alias = (
87+
expr.AliasExpression(field[1])
88+
if isinstance(field[1], str)
89+
else field[1]
90+
if (field[0] != field[1])
91+
else None
92+
)
8793
return SelectExpression(
8894
expression=expr.ColumnExpression(name=field[0]), alias=alias
8995
)

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,7 @@ def _compile_result_node(self, root: nodes.ResultNode) -> str:
125125
(name, scalar_compiler.compile_scalar_expression(ref))
126126
for ref, name in root.output_cols
127127
)
128-
# Skip squashing selections to ensure the right ordering and limit keys
129-
sqlglot_ir = self.compile_node(root.child).select(
130-
selected_cols, squash_selections=False
131-
)
128+
sqlglot_ir = self.compile_node(root.child).select(selected_cols)
132129

133130
if root.order_by is not None:
134131
ordering_cols = tuple(

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 0 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,6 @@ def from_union(
203203
def select(
204204
self,
205205
selected_cols: tuple[tuple[str, sge.Expression], ...],
206-
squash_selections: bool = True,
207206
) -> SQLGlotIR:
208207
selections = [
209208
sge.Alias(
@@ -213,15 +212,6 @@ def select(
213212
for id, expr in selected_cols
214213
]
215214

216-
# If squashing is enabled, we try to simplify the selections
217-
# by checking if the new selections are simply aliases of the
218-
# original columns.
219-
if squash_selections:
220-
new_selections = _squash_selections(self.expr.expressions, selections)
221-
if new_selections != []:
222-
new_expr = self.expr.select(*new_selections, append=False)
223-
return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen)
224-
225215
new_expr = self._encapsulate_as_cte().select(*selections, append=False)
226216
return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen)
227217

@@ -361,63 +351,3 @@ def _table(table: bigquery.TableReference) -> sge.Table:
361351
db=sg.to_identifier(table.dataset_id, quoted=True),
362352
catalog=sg.to_identifier(table.project, quoted=True),
363353
)
364-
365-
366-
def _squash_selections(
367-
old_expr: list[sge.Expression], new_expr: list[sge.Alias]
368-
) -> list[sge.Alias]:
369-
"""
370-
TODO: Reanble this function to optimize the SQL.
371-
Simplifies the select column expressions if existing (old_expr) and
372-
new (new_expr) selected columns are both simple aliases of column definitions.
373-
374-
Example:
375-
old_expr: [A AS X, B AS Y]
376-
new_expr: [X AS P, Y AS Q]
377-
Result: [A AS P, B AS Q]
378-
"""
379-
old_alias_map: typing.Dict[str, str] = {}
380-
for selected in old_expr:
381-
column_alias_pair = _get_column_alias_pair(selected)
382-
if column_alias_pair is None:
383-
return []
384-
else:
385-
old_alias_map[column_alias_pair[1]] = column_alias_pair[0]
386-
387-
new_selected_cols: typing.List[sge.Alias] = []
388-
for selected in new_expr:
389-
column_alias_pair = _get_column_alias_pair(selected)
390-
if column_alias_pair is None or column_alias_pair[0] not in old_alias_map:
391-
return []
392-
else:
393-
new_alias_expr = sge.Alias(
394-
this=sge.ColumnDef(
395-
this=sge.to_identifier(
396-
old_alias_map[column_alias_pair[0]], quoted=True
397-
)
398-
),
399-
alias=sg.to_identifier(column_alias_pair[1], quoted=True),
400-
)
401-
new_selected_cols.append(new_alias_expr)
402-
return new_selected_cols
403-
404-
405-
def _get_column_alias_pair(
406-
expr: sge.Expression,
407-
) -> typing.Optional[typing.Tuple[str, str]]:
408-
"""Checks if an expression is a simple alias of a column definition
409-
(e.g., "column_name AS alias_name").
410-
If it is, returns a tuple containing the alias name and original column name.
411-
Returns `None` otherwise.
412-
"""
413-
if not isinstance(expr, sge.Alias):
414-
return None
415-
if not isinstance(expr.this, sge.ColumnDef):
416-
return None
417-
418-
column_def_expr: sge.ColumnDef = expr.this
419-
if not isinstance(column_def_expr.this, sge.Identifier):
420-
return None
421-
422-
original_identifier: sge.Identifier = column_def_expr.this
423-
return (original_identifier.this, expr.alias)

bigframes/core/indexes/base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,9 @@ def __repr__(self) -> str:
251251
# metadata, like we do with DataFrame.
252252
opts = bigframes.options.display
253253
max_results = opts.max_rows
254-
if opts.repr_mode == "deferred":
254+
# anywdiget mode uses the same display logic as the "deferred" mode
255+
# for faster execution
256+
if opts.repr_mode in ("deferred", "anywidget"):
255257
_, dry_run_query_job = self._block._compute_dry_run()
256258
return formatter.repr_query_job(dry_run_query_job)
257259

bigframes/core/rewrite/pruning.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
import dataclasses
1515
import functools
16-
from typing import AbstractSet
16+
import typing
1717

1818
from bigframes.core import identifiers, nodes
1919

@@ -143,7 +143,7 @@ def prune_selection_child(
143143

144144
def prune_node(
145145
node: nodes.BigFrameNode,
146-
ids: AbstractSet[identifiers.ColumnId],
146+
ids: typing.AbstractSet[identifiers.ColumnId],
147147
):
148148
# This clause is important, ensures idempotency, so can reach fixed point
149149
if not (set(node.ids) - ids):
@@ -157,7 +157,7 @@ def prune_node(
157157

158158
def prune_aggregate(
159159
node: nodes.AggregateNode,
160-
used_cols: AbstractSet[identifiers.ColumnId],
160+
used_cols: typing.AbstractSet[identifiers.ColumnId],
161161
) -> nodes.AggregateNode:
162162
pruned_aggs = (
163163
tuple(agg for agg in node.aggregations if agg[1] in used_cols)
@@ -169,15 +169,15 @@ def prune_aggregate(
169169
@functools.singledispatch
170170
def prune_leaf(
171171
node: nodes.BigFrameNode,
172-
used_cols: AbstractSet[identifiers.ColumnId],
172+
used_cols: typing.AbstractSet[identifiers.ColumnId],
173173
):
174174
...
175175

176176

177177
@prune_leaf.register
178178
def prune_readlocal(
179179
node: nodes.ReadLocalNode,
180-
selection: AbstractSet[identifiers.ColumnId],
180+
selection: typing.AbstractSet[identifiers.ColumnId],
181181
) -> nodes.ReadLocalNode:
182182
new_scan_list = node.scan_list.filter_cols(selection)
183183
return dataclasses.replace(
@@ -190,7 +190,7 @@ def prune_readlocal(
190190
@prune_leaf.register
191191
def prune_readtable(
192192
node: nodes.ReadTableNode,
193-
selection: AbstractSet[identifiers.ColumnId],
193+
selection: typing.AbstractSet[identifiers.ColumnId],
194194
) -> nodes.ReadTableNode:
195195
new_scan_list = node.scan_list.filter_cols(selection)
196196
return dataclasses.replace(node, scan_list=new_scan_list)

bigframes/dataframe.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,9 @@ def __repr__(self) -> str:
725725

726726
opts = bigframes.options.display
727727
max_results = opts.max_rows
728-
if opts.repr_mode == "deferred":
728+
# anywdiget mode uses the same display logic as the "deferred" mode
729+
# for faster execution
730+
if opts.repr_mode in ("deferred", "anywidget"):
729731
return formatter.repr_query_job(self._compute_dry_run())
730732

731733
# TODO(swast): pass max_columns and get the true column count back. Maybe
@@ -774,6 +776,23 @@ def _repr_html_(self) -> str:
774776
if opts.repr_mode == "deferred":
775777
return formatter.repr_query_job(self._compute_dry_run())
776778

779+
if opts.repr_mode == "anywidget":
780+
import anywidget # type: ignore
781+
782+
# create an iterator for the data batches
783+
batches = self.to_pandas_batches()
784+
785+
# get the first page result
786+
try:
787+
first_page = next(iter(batches))
788+
except StopIteration:
789+
first_page = pandas.DataFrame(columns=self.columns)
790+
791+
# Instantiate and return the widget. The widget's frontend will
792+
# handle the display of the table and pagination
793+
return anywidget.AnyWidget(dataframe=first_page)
794+
795+
self._cached()
777796
df = self.copy()
778797
if bigframes.options.display.blob_display:
779798
blob_cols = [

bigframes/series.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,9 @@ def __repr__(self) -> str:
430430
# metadata, like we do with DataFrame.
431431
opts = bigframes.options.display
432432
max_results = opts.max_rows
433-
if opts.repr_mode == "deferred":
433+
# anywdiget mode uses the same display logic as the "deferred" mode
434+
# for faster execution
435+
if opts.repr_mode in ("deferred", "anywidget"):
434436
return formatter.repr_query_job(self._compute_dry_run())
435437

436438
self._cached()

mypy.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,6 @@ ignore_missing_imports = True
4141

4242
[mypy-google.cloud.bigtable]
4343
ignore_missing_imports = True
44+
45+
[mypy-anywidget]
46+
ignore_missing_imports = True
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "d10bfca4",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"# Copyright 2025 Google LLC\n",
11+
"#\n",
12+
"# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
13+
"# you may not use this file except in compliance with the License.\n",
14+
"# You may obtain a copy of the License at\n",
15+
"#\n",
16+
"# https://www.apache.org/licenses/LICENSE-2.0\n",
17+
"#\n",
18+
"# Unless required by applicable law or agreed to in writing, software\n",
19+
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
20+
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
21+
"# See the License for the specific language governing permissions and\n",
22+
"# limitations under the License."
23+
]
24+
},
25+
{
26+
"cell_type": "markdown",
27+
"id": "acca43ae",
28+
"metadata": {},
29+
"source": [
30+
"# Demo to Show Anywidget mode"
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": 2,
36+
"id": "ca22f059",
37+
"metadata": {},
38+
"outputs": [],
39+
"source": [
40+
"import bigframes.pandas as bpd"
41+
]
42+
},
43+
{
44+
"cell_type": "markdown",
45+
"id": "04406a4d",
46+
"metadata": {},
47+
"source": [
48+
"Set the display option to use anywidget"
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": 3,
54+
"id": "1bc5aaf3",
55+
"metadata": {},
56+
"outputs": [],
57+
"source": [
58+
"bpd.options.display.repr_mode = \"anywidget\""
59+
]
60+
},
61+
{
62+
"cell_type": "markdown",
63+
"id": "0a354c69",
64+
"metadata": {},
65+
"source": [
66+
"Display the dataframe in anywidget mode"
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": 4,
72+
"id": "f289d250",
73+
"metadata": {},
74+
"outputs": [
75+
{
76+
"data": {
77+
"text/html": [
78+
"Query job 91997f19-1768-4360-afa7-4a431b3e2d22 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:91997f19-1768-4360-afa7-4a431b3e2d22&page=queryresults\">Open Job</a>"
79+
],
80+
"text/plain": [
81+
"<IPython.core.display.HTML object>"
82+
]
83+
},
84+
"metadata": {},
85+
"output_type": "display_data"
86+
},
87+
{
88+
"name": "stdout",
89+
"output_type": "stream",
90+
"text": [
91+
"Computation deferred. Computation will process 171.4 MB\n"
92+
]
93+
}
94+
],
95+
"source": [
96+
"df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n",
97+
"print(df)"
98+
]
99+
},
100+
{
101+
"cell_type": "markdown",
102+
"id": "3a73e472",
103+
"metadata": {},
104+
"source": [
105+
"Display Series in anywidget mode"
106+
]
107+
},
108+
{
109+
"cell_type": "code",
110+
"execution_count": 5,
111+
"id": "42bb02ab",
112+
"metadata": {},
113+
"outputs": [
114+
{
115+
"name": "stdout",
116+
"output_type": "stream",
117+
"text": [
118+
"Computation deferred. Computation will process 171.4 MB\n"
119+
]
120+
}
121+
],
122+
"source": [
123+
"test_series = df[\"year\"]\n",
124+
"print(test_series)"
125+
]
126+
}
127+
],
128+
"metadata": {
129+
"kernelspec": {
130+
"display_name": "venv",
131+
"language": "python",
132+
"name": "python3"
133+
},
134+
"language_info": {
135+
"codemirror_mode": {
136+
"name": "ipython",
137+
"version": 3
138+
},
139+
"file_extension": ".py",
140+
"mimetype": "text/x-python",
141+
"name": "python",
142+
"nbconvert_exporter": "python",
143+
"pygments_lexer": "ipython3",
144+
"version": "3.10.15"
145+
}
146+
},
147+
"nbformat": 4,
148+
"nbformat_minor": 5
149+
}

0 commit comments

Comments
 (0)