Skip to content

Commit d97cfac

Browse files
committed
Merge branch 'main' into shuowei-anywidget-col
2 parents b75cb8c + 196f6df commit d97cfac

File tree

47 files changed

+864
-252
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+864
-252
lines changed

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,29 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.28.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.27.0...v2.28.0) (2025-11-03)
8+
9+
10+
### Features
11+
12+
* Add bigframes.bigquery.st_simplify ([#2210](https://github.com/googleapis/python-bigquery-dataframes/issues/2210)) ([ecee2bc](https://github.com/googleapis/python-bigquery-dataframes/commit/ecee2bc6ada0bc968fc56ed7194dc8c043547e93))
13+
* Add Series.dt.day_name ([#2218](https://github.com/googleapis/python-bigquery-dataframes/issues/2218)) ([5e006e4](https://github.com/googleapis/python-bigquery-dataframes/commit/5e006e404b65c32e5b1d342ebfcfce59ee592c8c))
14+
* Polars engine supports std, var ([#2215](https://github.com/googleapis/python-bigquery-dataframes/issues/2215)) ([ef5e83a](https://github.com/googleapis/python-bigquery-dataframes/commit/ef5e83acedf005cbe1e6ad174bec523ac50517d7))
15+
* Support INFORMATION_SCHEMA views in `read_gbq` ([#1895](https://github.com/googleapis/python-bigquery-dataframes/issues/1895)) ([d97cafc](https://github.com/googleapis/python-bigquery-dataframes/commit/d97cafcb5921fca2351b18011b0e54e2631cc53d))
16+
* Support some python standard lib callables in apply/combine ([#2187](https://github.com/googleapis/python-bigquery-dataframes/issues/2187)) ([86a2756](https://github.com/googleapis/python-bigquery-dataframes/commit/86a27564b48b854a32b3d11cd2105aa0fa496279))
17+
18+
19+
### Bug Fixes
20+
21+
* Correct connection normalization in blob system tests ([#2222](https://github.com/googleapis/python-bigquery-dataframes/issues/2222)) ([a0e1e50](https://github.com/googleapis/python-bigquery-dataframes/commit/a0e1e50e47c758bdceb54d04180ed36b35cf2e35))
22+
* Improve error handling in blob operations ([#2194](https://github.com/googleapis/python-bigquery-dataframes/issues/2194)) ([d410046](https://github.com/googleapis/python-bigquery-dataframes/commit/d4100466612df0523d01ed01ca1e115dabd6ef45))
23+
* Resolve AttributeError in TableWidget and improve initialization ([#1937](https://github.com/googleapis/python-bigquery-dataframes/issues/1937)) ([4c4c9b1](https://github.com/googleapis/python-bigquery-dataframes/commit/4c4c9b14657b7cda1940ef39e7d4db20a9ff5308))
24+
25+
26+
### Documentation
27+
28+
* Update bq_dataframes_llm_output_schema.ipynb ([#2004](https://github.com/googleapis/python-bigquery-dataframes/issues/2004)) ([316ba9f](https://github.com/googleapis/python-bigquery-dataframes/commit/316ba9f557d792117d5a7845d7567498f78dd513))
29+
730
## [2.27.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.26.0...v2.27.0) (2025-10-24)
831

932

bigframes/core/blocks.py

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
import bigframes.operations.aggregations as agg_ops
6969
from bigframes.session import dry_runs, execution_spec
7070
from bigframes.session import executor as executors
71+
from bigframes.session._io import pandas as io_pandas
7172

7273
# Type constraint for wherever column labels are used
7374
Label = typing.Hashable
@@ -711,40 +712,15 @@ def to_pandas_batches(
711712
# To reduce the number of edge cases to consider when working with the
712713
# results of this, always return at least one DataFrame. See:
713714
# b/428918844.
714-
empty_val = pd.DataFrame(
715-
{
716-
col: pd.Series([], dtype=self.expr.get_column_type(col))
717-
for col in itertools.chain(self.value_columns, self.index_columns)
718-
}
719-
)
720-
series_map = {}
721-
for col in itertools.chain(self.value_columns, self.index_columns):
722-
dtype = self.expr.get_column_type(col)
723-
if bigframes.dtypes.contains_db_dtypes_json_dtype(dtype):
724-
# Due to a limitation in Apache Arrow (#45262), JSON columns are not
725-
# natively supported by the to_pandas_batches() method, which is
726-
# used by the anywidget backend.
727-
# Workaround for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
728-
# PyArrow doesn't support creating an empty array with db_dtypes.JSONArrowType,
729-
# especially when nested.
730-
# Create with string type and then cast.
731-
732-
# MyPy doesn't automatically narrow the type of 'dtype' here,
733-
# so we add an explicit check.
734-
if isinstance(dtype, pd.ArrowDtype):
735-
safe_pa_type = bigframes.dtypes._replace_json_arrow_with_string(
736-
dtype.pyarrow_dtype
737-
)
738-
safe_dtype = pd.ArrowDtype(safe_pa_type)
739-
series_map[col] = pd.Series([], dtype=safe_dtype).astype(dtype)
740-
else:
741-
# This branch should ideally not be reached if
742-
# contains_db_dtypes_json_dtype is accurate,
743-
# but it's here for MyPy's sake.
744-
series_map[col] = pd.Series([], dtype=dtype)
745-
else:
746-
series_map[col] = pd.Series([], dtype=dtype)
747-
empty_val = pd.DataFrame(series_map)
715+
try:
716+
empty_arrow_table = self.expr.schema.to_pyarrow().empty_table()
717+
except pa.ArrowNotImplementedError:
718+
# Bug with some pyarrow versions(https://github.com/apache/arrow/issues/45262),
719+
# empty_table only supports base storage types, not extension types.
720+
empty_arrow_table = self.expr.schema.to_pyarrow(
721+
use_storage_types=True
722+
).empty_table()
723+
empty_val = io_pandas.arrow_to_pandas(empty_arrow_table, self.expr.schema)
748724
dfs = map(
749725
lambda a: a[0],
750726
itertools.zip_longest(
@@ -2020,6 +1996,31 @@ def _generate_resample_label(
20201996
Literal["epoch", "start", "start_day", "end", "end_day"],
20211997
] = "start_day",
20221998
) -> Block:
1999+
if not isinstance(rule, str):
2000+
raise NotImplementedError(
2001+
f"Only offset strings are currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
2002+
)
2003+
2004+
if rule in ("ME", "YE", "QE", "BME", "BA", "BQE", "W"):
2005+
raise NotImplementedError(
2006+
f"Offset strings 'ME', 'YE', 'QE', 'BME', 'BA', 'BQE', 'W' are not currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
2007+
)
2008+
2009+
if closed == "right":
2010+
raise NotImplementedError(
2011+
f"Only closed='left' is currently supported. {constants.FEEDBACK_LINK}",
2012+
)
2013+
2014+
if label == "right":
2015+
raise NotImplementedError(
2016+
f"Only label='left' is currently supported. {constants.FEEDBACK_LINK}",
2017+
)
2018+
2019+
if origin not in ("epoch", "start", "start_day"):
2020+
raise NotImplementedError(
2021+
f"Only origin='epoch', 'start', 'start_day' are currently supported, but got {repr(origin)}. {constants.FEEDBACK_LINK}"
2022+
)
2023+
20232024
# Validate and resolve the index or column to use for grouping
20242025
if on is None:
20252026
if len(self.index_columns) == 0:

bigframes/core/compile/polars/compiler.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,9 +535,11 @@ def compile_agg_op(
535535
if isinstance(op, agg_ops.StdOp):
536536
return pl.std(inputs[0])
537537
if isinstance(op, agg_ops.VarOp):
538-
return pl.var(inputs[0])
538+
# polars var doesnt' support decimal, so use std instead
539+
return pl.std(inputs[0]).pow(2)
539540
if isinstance(op, agg_ops.PopVarOp):
540-
return pl.var(inputs[0], ddof=0)
541+
# polars var doesnt' support decimal, so use std instead
542+
return pl.std(inputs[0], ddof=0).pow(2)
541543
if isinstance(op, agg_ops.FirstNonNullOp):
542544
return pl.col(*inputs).drop_nulls().first()
543545
if isinstance(op, agg_ops.LastNonNullOp):

bigframes/core/compile/sqlglot/aggregations/op_registration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,5 @@ def arg_checker(*args, **kwargs):
5252
def __getitem__(self, op: str | agg_ops.WindowOp) -> CompilationFunc:
5353
key = op if isinstance(op, type) else type(op)
5454
if str(key) not in self._registered_ops:
55-
raise ValueError(f"{key} is already not registered")
55+
raise ValueError(f"{key} is not registered")
5656
return self._registered_ops[str(key)]

bigframes/core/compile/sqlglot/aggregations/unary_compiler.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,20 @@ def _(
239239
return apply_window_if_present(sge.func("MIN", column.expr), window)
240240

241241

242+
@UNARY_OP_REGISTRATION.register(agg_ops.PopVarOp)
243+
def _(
244+
op: agg_ops.PopVarOp,
245+
column: typed_expr.TypedExpr,
246+
window: typing.Optional[window_spec.WindowSpec] = None,
247+
) -> sge.Expression:
248+
expr = column.expr
249+
if column.dtype == dtypes.BOOL_DTYPE:
250+
expr = sge.Cast(this=expr, to="INT64")
251+
252+
expr = sge.func("VAR_POP", expr)
253+
return apply_window_if_present(expr, window)
254+
255+
242256
@UNARY_OP_REGISTRATION.register(agg_ops.QuantileOp)
243257
def _(
244258
op: agg_ops.QuantileOp,
@@ -278,6 +292,22 @@ def _(
278292
return apply_window_if_present(sge.func("COUNT", sge.convert(1)), window)
279293

280294

295+
@UNARY_OP_REGISTRATION.register(agg_ops.StdOp)
296+
def _(
297+
op: agg_ops.StdOp,
298+
column: typed_expr.TypedExpr,
299+
window: typing.Optional[window_spec.WindowSpec] = None,
300+
) -> sge.Expression:
301+
expr = column.expr
302+
if column.dtype == dtypes.BOOL_DTYPE:
303+
expr = sge.Cast(this=expr, to="INT64")
304+
305+
expr = sge.func("STDDEV", expr)
306+
if op.should_floor_result or column.dtype == dtypes.TIMEDELTA_DTYPE:
307+
expr = sge.Cast(this=sge.func("FLOOR", expr), to="INT64")
308+
return apply_window_if_present(expr, window)
309+
310+
281311
@UNARY_OP_REGISTRATION.register(agg_ops.ShiftOp)
282312
def _(
283313
op: agg_ops.ShiftOp,
@@ -331,3 +361,17 @@ def _(
331361
expression=shifted,
332362
unit=sge.Identifier(this="MICROSECOND"),
333363
)
364+
365+
366+
@UNARY_OP_REGISTRATION.register(agg_ops.VarOp)
367+
def _(
368+
op: agg_ops.VarOp,
369+
column: typed_expr.TypedExpr,
370+
window: typing.Optional[window_spec.WindowSpec] = None,
371+
) -> sge.Expression:
372+
expr = column.expr
373+
if column.dtype == dtypes.BOOL_DTYPE:
374+
expr = sge.Cast(this=expr, to="INT64")
375+
376+
expr = sge.func("VAR_SAMP", expr)
377+
return apply_window_if_present(expr, window)

bigframes/core/compile/sqlglot/expressions/comparison_ops.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
109109
return sge.LTE(this=left_expr, expression=right_expr)
110110

111111

112+
@register_binary_op(ops.maximum_op)
113+
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
114+
return sge.Greatest(expressions=[left.expr, right.expr])
115+
116+
112117
@register_binary_op(ops.minimum_op)
113118
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
114119
return sge.Least(this=left.expr, expressions=right.expr)

bigframes/core/compile/sqlglot/expressions/generic_ops.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
2525

2626
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
27+
register_binary_op = scalar_compiler.scalar_op_compiler.register_binary_op
2728
register_nary_op = scalar_compiler.scalar_op_compiler.register_nary_op
2829
register_ternary_op = scalar_compiler.scalar_op_compiler.register_ternary_op
2930

@@ -159,6 +160,13 @@ def _(*cases_and_outputs: TypedExpr) -> sge.Expression:
159160
)
160161

161162

163+
@register_binary_op(ops.coalesce_op)
164+
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
165+
if left.expr == right.expr:
166+
return left.expr
167+
return sge.Coalesce(this=left.expr, expressions=[right.expr])
168+
169+
162170
@register_nary_op(ops.RowKey)
163171
def _(*values: TypedExpr) -> sge.Expression:
164172
# All inputs into hash must be non-null or resulting hash will be null

bigframes/core/nodes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1627,7 +1627,7 @@ class ResultNode(UnaryNode):
16271627
# TODO: CTE definitions
16281628

16291629
def _validate(self):
1630-
for ref, name in self.output_cols:
1630+
for ref, _ in self.output_cols:
16311631
assert ref.id in self.child.ids
16321632

16331633
@property

bigframes/core/rewrite/identifiers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,10 @@ def remap_variables(
5757
new_root = root.transform_children(lambda node: remapped_children[node])
5858

5959
# Step 3: Transform the current node using the mappings from its children.
60+
# "reversed" is required for InNode so that in case of a duplicate column ID,
61+
# the left child's mapping is the one that's kept.
6062
downstream_mappings: dict[identifiers.ColumnId, identifiers.ColumnId] = {
61-
k: v for mapping in new_child_mappings for k, v in mapping.items()
63+
k: v for mapping in reversed(new_child_mappings) for k, v in mapping.items()
6264
}
6365
if isinstance(new_root, nodes.InNode):
6466
new_root = typing.cast(nodes.InNode, new_root)

bigframes/dataframe.py

Lines changed: 5 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -4182,10 +4182,12 @@ def _split(
41824182
return [DataFrame(block) for block in blocks]
41834183

41844184
@validations.requires_ordering()
4185-
def _resample(
4185+
def resample(
41864186
self,
41874187
rule: str,
41884188
*,
4189+
closed: Optional[Literal["right", "left"]] = None,
4190+
label: Optional[Literal["right", "left"]] = None,
41894191
on: blocks.Label = None,
41904192
level: Optional[LevelsType] = None,
41914193
origin: Union[
@@ -4195,64 +4197,10 @@ def _resample(
41954197
Literal["epoch", "start", "start_day", "end", "end_day"],
41964198
] = "start_day",
41974199
) -> bigframes.core.groupby.DataFrameGroupBy:
4198-
"""Internal function to support resample. Resample time-series data.
4199-
4200-
**Examples:**
4201-
4202-
>>> import bigframes.pandas as bpd
4203-
>>> data = {
4204-
... "timestamp_col": pd.date_range(
4205-
... start="2021-01-01 13:00:00", periods=30, freq="1s"
4206-
... ),
4207-
... "int64_col": range(30),
4208-
... "int64_too": range(10, 40),
4209-
... }
4210-
4211-
Resample on a DataFrame with index:
4212-
4213-
>>> df = bpd.DataFrame(data).set_index("timestamp_col")
4214-
>>> df._resample(rule="7s").min()
4215-
int64_col int64_too
4216-
2021-01-01 12:59:55 0 10
4217-
2021-01-01 13:00:02 2 12
4218-
2021-01-01 13:00:09 9 19
4219-
2021-01-01 13:00:16 16 26
4220-
2021-01-01 13:00:23 23 33
4221-
<BLANKLINE>
4222-
[5 rows x 2 columns]
4223-
4224-
Resample with column and origin set to 'start':
4225-
4226-
>>> df = bpd.DataFrame(data)
4227-
>>> df._resample(rule="7s", on = "timestamp_col", origin="start").min()
4228-
int64_col int64_too
4229-
2021-01-01 13:00:00 0 10
4230-
2021-01-01 13:00:07 7 17
4231-
2021-01-01 13:00:14 14 24
4232-
2021-01-01 13:00:21 21 31
4233-
2021-01-01 13:00:28 28 38
4234-
<BLANKLINE>
4235-
[5 rows x 2 columns]
4236-
4237-
Args:
4238-
rule (str):
4239-
The offset string representing target conversion.
4240-
on (str, default None):
4241-
For a DataFrame, column to use instead of index for resampling. Column
4242-
must be datetime-like.
4243-
level (str or int, default None):
4244-
For a MultiIndex, level (name or number) to use for resampling.
4245-
level must be datetime-like.
4246-
origin(str, default 'start_day'):
4247-
The timestamp on which to adjust the grouping. Must be one of the following:
4248-
'epoch': origin is 1970-01-01
4249-
'start': origin is the first value of the timeseries
4250-
'start_day': origin is the first day at midnight of the timeseries
4251-
Returns:
4252-
DataFrameGroupBy: DataFrameGroupBy object.
4253-
"""
42544200
block = self._block._generate_resample_label(
42554201
rule=rule,
4202+
closed=closed,
4203+
label=label,
42564204
on=on,
42574205
level=level,
42584206
origin=origin,

0 commit comments

Comments
 (0)