Skip to content

Commit ed48bf6

Browse files
authored
Merge branch 'main' into sycai_ai_gen_double
2 parents 11ec67e + 1fc563c commit ed48bf6

File tree

35 files changed

+684
-57
lines changed

35 files changed

+684
-57
lines changed

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,27 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.22.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.21.0...v2.22.0) (2025-09-25)
8+
9+
10+
### Features
11+
12+
* Add `GroupBy.__iter__` ([#1394](https://github.com/googleapis/python-bigquery-dataframes/issues/1394)) ([c56a78c](https://github.com/googleapis/python-bigquery-dataframes/commit/c56a78cd509a535d4998d5b9a99ec3ecd334b883))
13+
* Add ai.generate_int to bigframes.bigquery package ([#2109](https://github.com/googleapis/python-bigquery-dataframes/issues/2109)) ([af6b862](https://github.com/googleapis/python-bigquery-dataframes/commit/af6b862de5c3921684210ec169338815f45b19dd))
14+
* Add Groupby.describe() ([#2088](https://github.com/googleapis/python-bigquery-dataframes/issues/2088)) ([328a765](https://github.com/googleapis/python-bigquery-dataframes/commit/328a765e746138806a021bea22475e8c03512aeb))
15+
* Implement `Index.to_list()` ([#2106](https://github.com/googleapis/python-bigquery-dataframes/issues/2106)) ([60056ca](https://github.com/googleapis/python-bigquery-dataframes/commit/60056ca06511f99092647fe55fc02eeab486b4ca))
16+
* Implement inplace parameter for `DataFrame.drop` ([#2105](https://github.com/googleapis/python-bigquery-dataframes/issues/2105)) ([3487f13](https://github.com/googleapis/python-bigquery-dataframes/commit/3487f13d12e34999b385c2e11551b5e27bfbf4ff))
17+
* Support callable for series map method ([#2100](https://github.com/googleapis/python-bigquery-dataframes/issues/2100)) ([ac25618](https://github.com/googleapis/python-bigquery-dataframes/commit/ac25618feed2da11fe4fb85058d498d262c085c0))
18+
* Support df.info() with null index ([#2094](https://github.com/googleapis/python-bigquery-dataframes/issues/2094)) ([fb81eea](https://github.com/googleapis/python-bigquery-dataframes/commit/fb81eeaf13af059f32cb38e7f117fb3504243d51))
19+
20+
21+
### Bug Fixes
22+
23+
* Avoid ibis fillna warning in compiler ([#2113](https://github.com/googleapis/python-bigquery-dataframes/issues/2113)) ([7ef667b](https://github.com/googleapis/python-bigquery-dataframes/commit/7ef667b0f46f13bcc8ad4f2ed8f81278132b5aec))
24+
* Negative start and stop parameter values in Series.str.slice() ([#2104](https://github.com/googleapis/python-bigquery-dataframes/issues/2104)) ([f57a348](https://github.com/googleapis/python-bigquery-dataframes/commit/f57a348f1935a4e2bb14c501bb4c47cd552d102a))
25+
* Throw type error for incomparable join keys ([#2098](https://github.com/googleapis/python-bigquery-dataframes/issues/2098)) ([9dc9695](https://github.com/googleapis/python-bigquery-dataframes/commit/9dc96959a84b751d18b290129c2926df6e50b3f5))
26+
* Transformers with non-standard column names throw errors ([#2089](https://github.com/googleapis/python-bigquery-dataframes/issues/2089)) ([a2daa3f](https://github.com/googleapis/python-bigquery-dataframes/commit/a2daa3fffe6743327edb9f4c74db93198bd12f8e))
27+
728
## [2.21.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.20.0...v2.21.0) (2025-09-17)
829

930

bigframes/core/compile/default_ordering.py renamed to bigframes/core/compile/ibis_compiler/default_ordering.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,7 @@ def _convert_to_nonnull_string(column: ibis_types.Value) -> ibis_types.StringVal
4747
result = ibis_ops.ToJsonString(column).to_expr() # type: ignore
4848
# Escape backslashes and use backslash as delineator
4949
escaped = cast(
50-
ibis_types.StringColumn,
51-
result.fill_null(ibis_types.literal(""))
52-
if hasattr(result, "fill_null")
53-
else result.fillna(""),
50+
ibis_types.StringColumn, result.fill_null(ibis_types.literal(""))
5451
).replace(
5552
"\\", # type: ignore
5653
"\\\\", # type: ignore

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import pandas as pd
2929

3030
from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS
31-
import bigframes.core.compile.default_ordering
31+
import bigframes.core.compile.ibis_compiler.default_ordering
3232
from bigframes.core.compile.ibis_compiler.scalar_op_compiler import (
3333
scalar_op_compiler, # TODO(tswast): avoid import of variables
3434
)
@@ -1064,7 +1064,7 @@ def isin_op_impl(x: ibis_types.Value, op: ops.IsInOp):
10641064
if op.match_nulls and contains_nulls:
10651065
return x.isnull() | x.isin(matchable_ibis_values)
10661066
else:
1067-
return x.isin(matchable_ibis_values).fillna(False)
1067+
return x.isin(matchable_ibis_values).fill_null(ibis.literal(False))
10681068

10691069

10701070
@scalar_op_compiler.register_unary_op(ops.ToDatetimeOp, pass_op=True)
@@ -1383,8 +1383,8 @@ def eq_nulls_match_op(
13831383
left = x.cast(ibis_dtypes.str).fill_null(literal)
13841384
right = y.cast(ibis_dtypes.str).fill_null(literal)
13851385
else:
1386-
left = x.cast(ibis_dtypes.str).fillna(literal)
1387-
right = y.cast(ibis_dtypes.str).fillna(literal)
1386+
left = x.cast(ibis_dtypes.str).fill_null(literal)
1387+
right = y.cast(ibis_dtypes.str).fill_null(literal)
13881388

13891389
return left == right
13901390

@@ -1813,7 +1813,7 @@ def fillna_op(
18131813
if hasattr(x, "fill_null"):
18141814
return x.fill_null(typing.cast(ibis_types.Scalar, y))
18151815
else:
1816-
return x.fillna(typing.cast(ibis_types.Scalar, y))
1816+
return x.fill_null(typing.cast(ibis_types.Scalar, y))
18171817

18181818

18191819
@scalar_op_compiler.register_binary_op(ops.round_op)
@@ -2030,7 +2030,7 @@ def _construct_prompt(
20302030

20312031
@scalar_op_compiler.register_nary_op(ops.RowKey, pass_op=True)
20322032
def rowkey_op_impl(*values: ibis_types.Value, op: ops.RowKey) -> ibis_types.Value:
2033-
return bigframes.core.compile.default_ordering.gen_row_key(values)
2033+
return bigframes.core.compile.ibis_compiler.default_ordering.gen_row_key(values)
20342034

20352035

20362036
# Helpers

bigframes/core/compile/sqlglot/aggregate_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def compile_analytic(
6363
window: window_spec.WindowSpec,
6464
) -> sge.Expression:
6565
if isinstance(aggregate, agg_expressions.NullaryAggregation):
66-
return nullary_compiler.compile(aggregate.op)
66+
return nullary_compiler.compile(aggregate.op, window)
6767
if isinstance(aggregate, agg_expressions.UnaryAggregation):
6868
column = typed_expr.TypedExpr(
6969
scalar_compiler.scalar_op_compiler.compile_expression(aggregate.arg),

bigframes/core/compile/sqlglot/aggregations/binary_compiler.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from bigframes.core import window_spec
2222
import bigframes.core.compile.sqlglot.aggregations.op_registration as reg
23+
from bigframes.core.compile.sqlglot.aggregations.windows import apply_window_if_present
2324
import bigframes.core.compile.sqlglot.expressions.typed_expr as typed_expr
2425
from bigframes.operations import aggregations as agg_ops
2526

@@ -33,3 +34,25 @@ def compile(
3334
window: typing.Optional[window_spec.WindowSpec] = None,
3435
) -> sge.Expression:
3536
return BINARY_OP_REGISTRATION[op](op, left, right, window=window)
37+
38+
39+
@BINARY_OP_REGISTRATION.register(agg_ops.CorrOp)
40+
def _(
41+
op: agg_ops.CorrOp,
42+
left: typed_expr.TypedExpr,
43+
right: typed_expr.TypedExpr,
44+
window: typing.Optional[window_spec.WindowSpec] = None,
45+
) -> sge.Expression:
46+
result = sge.func("CORR", left.expr, right.expr)
47+
return apply_window_if_present(result, window)
48+
49+
50+
@BINARY_OP_REGISTRATION.register(agg_ops.CovOp)
51+
def _(
52+
op: agg_ops.CovOp,
53+
left: typed_expr.TypedExpr,
54+
right: typed_expr.TypedExpr,
55+
window: typing.Optional[window_spec.WindowSpec] = None,
56+
) -> sge.Expression:
57+
result = sge.func("COVAR_SAMP", left.expr, right.expr)
58+
return apply_window_if_present(result, window)

bigframes/core/compile/sqlglot/aggregations/nullary_compiler.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,15 @@ def _(
3939
window: typing.Optional[window_spec.WindowSpec] = None,
4040
) -> sge.Expression:
4141
return apply_window_if_present(sge.func("COUNT", sge.convert(1)), window)
42+
43+
44+
@NULLARY_OP_REGISTRATION.register(agg_ops.RowNumberOp)
45+
def _(
46+
op: agg_ops.RowNumberOp,
47+
window: typing.Optional[window_spec.WindowSpec] = None,
48+
) -> sge.Expression:
49+
result: sge.Expression = sge.func("ROW_NUMBER")
50+
if window is None:
51+
# ROW_NUMBER always needs an OVER clause.
52+
return sge.Window(this=result)
53+
return apply_window_if_present(result, window)

bigframes/core/compile/sqlglot/aggregations/op_registration.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,22 +41,16 @@ def arg_checker(*args, **kwargs):
4141
)
4242
return item(*args, **kwargs)
4343

44-
if hasattr(op, "name"):
45-
key = typing.cast(str, op.name)
46-
if key in self._registered_ops:
47-
raise ValueError(f"{key} is already registered")
48-
else:
49-
raise ValueError(f"The operator must have a 'name' attribute. Got {op}")
44+
key = str(op)
45+
if key in self._registered_ops:
46+
raise ValueError(f"{key} is already registered")
5047
self._registered_ops[key] = item
5148
return arg_checker
5249

5350
return decorator
5451

5552
def __getitem__(self, op: str | agg_ops.WindowOp) -> CompilationFunc:
56-
if isinstance(op, agg_ops.WindowOp):
57-
if not hasattr(op, "name"):
58-
raise ValueError(f"The operator must have a 'name' attribute. Got {op}")
59-
else:
60-
key = typing.cast(str, op.name)
61-
return self._registered_ops[key]
62-
return self._registered_ops[op]
53+
key = op if isinstance(op, type) else type(op)
54+
if str(key) not in self._registered_ops:
55+
raise ValueError(f"{key} is already not registered")
56+
return self._registered_ops[str(key)]

bigframes/core/compile/sqlglot/aggregations/unary_compiler.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,37 @@ def compile(
3838
return UNARY_OP_REGISTRATION[op](op, column, window=window)
3939

4040

41+
@UNARY_OP_REGISTRATION.register(agg_ops.ApproxQuartilesOp)
42+
def _(
43+
op: agg_ops.ApproxQuartilesOp,
44+
column: typed_expr.TypedExpr,
45+
window: typing.Optional[window_spec.WindowSpec] = None,
46+
) -> sge.Expression:
47+
if window is not None:
48+
raise NotImplementedError("Approx Quartiles with windowing is not supported.")
49+
# APPROX_QUANTILES returns an array of the quartiles, so we need to index it.
50+
# The op.quartile is 1-based for the quartile, but array is 0-indexed.
51+
# The quartiles are Q0, Q1, Q2, Q3, Q4. op.quartile is 1, 2, or 3.
52+
# The array has 5 elements (for N=4 intervals).
53+
# So we want the element at index `op.quartile`.
54+
approx_quantiles_expr = sge.func("APPROX_QUANTILES", column.expr, sge.convert(4))
55+
return sge.Bracket(
56+
this=approx_quantiles_expr,
57+
expressions=[sge.func("OFFSET", sge.convert(op.quartile))],
58+
)
59+
60+
61+
@UNARY_OP_REGISTRATION.register(agg_ops.ApproxTopCountOp)
62+
def _(
63+
op: agg_ops.ApproxTopCountOp,
64+
column: typed_expr.TypedExpr,
65+
window: typing.Optional[window_spec.WindowSpec] = None,
66+
) -> sge.Expression:
67+
if window is not None:
68+
raise NotImplementedError("Approx top count with windowing is not supported.")
69+
return sge.func("APPROX_TOP_COUNT", column.expr, sge.convert(op.number))
70+
71+
4172
@UNARY_OP_REGISTRATION.register(agg_ops.CountOp)
4273
def _(
4374
op: agg_ops.CountOp,
@@ -47,6 +78,15 @@ def _(
4778
return apply_window_if_present(sge.func("COUNT", column.expr), window)
4879

4980

81+
@UNARY_OP_REGISTRATION.register(agg_ops.DenseRankOp)
82+
def _(
83+
op: agg_ops.DenseRankOp,
84+
column: typed_expr.TypedExpr,
85+
window: typing.Optional[window_spec.WindowSpec] = None,
86+
) -> sge.Expression:
87+
return apply_window_if_present(sge.func("DENSE_RANK"), window)
88+
89+
5090
@UNARY_OP_REGISTRATION.register(agg_ops.MaxOp)
5191
def _(
5292
op: agg_ops.MaxOp,
@@ -56,6 +96,26 @@ def _(
5696
return apply_window_if_present(sge.func("MAX", column.expr), window)
5797

5898

99+
@UNARY_OP_REGISTRATION.register(agg_ops.MeanOp)
100+
def _(
101+
op: agg_ops.MeanOp,
102+
column: typed_expr.TypedExpr,
103+
window: typing.Optional[window_spec.WindowSpec] = None,
104+
) -> sge.Expression:
105+
expr = column.expr
106+
if column.dtype == dtypes.BOOL_DTYPE:
107+
expr = sge.Cast(this=expr, to="INT64")
108+
109+
expr = sge.func("AVG", expr)
110+
111+
should_floor_result = (
112+
op.should_floor_result or column.dtype == dtypes.TIMEDELTA_DTYPE
113+
)
114+
if should_floor_result:
115+
expr = sge.Cast(this=sge.func("FLOOR", expr), to="INT64")
116+
return apply_window_if_present(expr, window)
117+
118+
59119
@UNARY_OP_REGISTRATION.register(agg_ops.MedianOp)
60120
def _(
61121
op: agg_ops.MedianOp,
@@ -77,6 +137,34 @@ def _(
77137
return apply_window_if_present(sge.func("MIN", column.expr), window)
78138

79139

140+
@UNARY_OP_REGISTRATION.register(agg_ops.QuantileOp)
141+
def _(
142+
op: agg_ops.QuantileOp,
143+
column: typed_expr.TypedExpr,
144+
window: typing.Optional[window_spec.WindowSpec] = None,
145+
) -> sge.Expression:
146+
# TODO: Support interpolation argument
147+
# TODO: Support percentile_disc
148+
result: sge.Expression = sge.func("PERCENTILE_CONT", column.expr, sge.convert(op.q))
149+
if window is None:
150+
# PERCENTILE_CONT is a navigation function, not an aggregate function, so it always needs an OVER clause.
151+
result = sge.Window(this=result)
152+
else:
153+
result = apply_window_if_present(result, window)
154+
if op.should_floor_result:
155+
result = sge.Cast(this=sge.func("FLOOR", result), to="INT64")
156+
return result
157+
158+
159+
@UNARY_OP_REGISTRATION.register(agg_ops.RankOp)
160+
def _(
161+
op: agg_ops.RankOp,
162+
column: typed_expr.TypedExpr,
163+
window: typing.Optional[window_spec.WindowSpec] = None,
164+
) -> sge.Expression:
165+
return apply_window_if_present(sge.func("RANK"), window)
166+
167+
80168
@UNARY_OP_REGISTRATION.register(agg_ops.SizeUnaryOp)
81169
def _(
82170
op: agg_ops.SizeUnaryOp,

bigframes/core/compile/sqlglot/aggregations/windows.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ def apply_window_if_present(
6464
if not window.bounds and not order:
6565
return sge.Window(this=value, partition_by=group_by)
6666

67+
if not window.bounds:
68+
return sge.Window(this=value, partition_by=group_by, order=order)
69+
6770
kind = (
6871
"ROWS" if isinstance(window.bounds, window_spec.RowsWindowBounds) else "RANGE"
6972
)

bigframes/dataframe.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2006,6 +2006,7 @@ def insert(
20062006

20072007
self._set_block(block)
20082008

2009+
@overload
20092010
def drop(
20102011
self,
20112012
labels: typing.Any = None,
@@ -2014,7 +2015,33 @@ def drop(
20142015
index: typing.Any = None,
20152016
columns: Union[blocks.Label, Sequence[blocks.Label]] = None,
20162017
level: typing.Optional[LevelType] = None,
2018+
inplace: Literal[False] = False,
20172019
) -> DataFrame:
2020+
...
2021+
2022+
@overload
2023+
def drop(
2024+
self,
2025+
labels: typing.Any = None,
2026+
*,
2027+
axis: typing.Union[int, str] = 0,
2028+
index: typing.Any = None,
2029+
columns: Union[blocks.Label, Sequence[blocks.Label]] = None,
2030+
level: typing.Optional[LevelType] = None,
2031+
inplace: Literal[True],
2032+
) -> None:
2033+
...
2034+
2035+
def drop(
2036+
self,
2037+
labels: typing.Any = None,
2038+
*,
2039+
axis: typing.Union[int, str] = 0,
2040+
index: typing.Any = None,
2041+
columns: Union[blocks.Label, Sequence[blocks.Label]] = None,
2042+
level: typing.Optional[LevelType] = None,
2043+
inplace: bool = False,
2044+
) -> Optional[DataFrame]:
20182045
if labels:
20192046
if index or columns:
20202047
raise ValueError("Cannot specify both 'labels' and 'index'/'columns")
@@ -2056,7 +2083,11 @@ def drop(
20562083
inverse_condition_id, ops.invert_op
20572084
)
20582085
elif isinstance(index, indexes.Index):
2059-
return self._drop_by_index(index)
2086+
dropped_block = self._drop_by_index(index)._get_block()
2087+
if inplace:
2088+
self._set_block(dropped_block)
2089+
return None
2090+
return DataFrame(dropped_block)
20602091
else:
20612092
block, condition_id = block.project_expr(
20622093
ops.ne_op.as_expr(level_id, ex.const(index))
@@ -2068,7 +2099,12 @@ def drop(
20682099
block = block.drop_columns(self._sql_names(columns))
20692100
if index is None and not columns:
20702101
raise ValueError("Must specify 'labels' or 'index'/'columns")
2071-
return DataFrame(block)
2102+
2103+
if inplace:
2104+
self._set_block(block)
2105+
return None
2106+
else:
2107+
return DataFrame(block)
20722108

20732109
def _drop_by_index(self, index: indexes.Index) -> DataFrame:
20742110
block = index._block

0 commit comments

Comments
 (0)