Skip to content

Commit 31cb193

Browse files
Merge remote-tracking branch 'github/main' into python_fn_mappings
2 parents 40bf622 + dc46b3c commit 31cb193

File tree

26 files changed

+644
-36
lines changed

26 files changed

+644
-36
lines changed

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,24 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.27.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.26.0...v2.27.0) (2025-10-24)
8+
9+
10+
### Features
11+
12+
* Add __abs__ to dataframe ([#2186](https://github.com/googleapis/python-bigquery-dataframes/issues/2186)) ([c331dfe](https://github.com/googleapis/python-bigquery-dataframes/commit/c331dfed59174962fbdc8ace175dd00fcc3d5d50))
13+
* Add df.groupby().corr()/cov() support ([#2190](https://github.com/googleapis/python-bigquery-dataframes/issues/2190)) ([ccd7c07](https://github.com/googleapis/python-bigquery-dataframes/commit/ccd7c0774a65d09e6cf31d2b62d0bc64bd7c4248))
14+
* Add str accessor to index ([#2179](https://github.com/googleapis/python-bigquery-dataframes/issues/2179)) ([cd87ce0](https://github.com/googleapis/python-bigquery-dataframes/commit/cd87ce0d504747f44d1b5a55f869a2e0fca6df17))
15+
* Add support for `np.isnan` and `np.isfinite` ufuncs ([#2188](https://github.com/googleapis/python-bigquery-dataframes/issues/2188)) ([68723bc](https://github.com/googleapis/python-bigquery-dataframes/commit/68723bc1f08013e43a8b11752f908bf8fd6d51f5))
16+
* Include local data bytes in the dry run report when available ([#2185](https://github.com/googleapis/python-bigquery-dataframes/issues/2185)) ([ee2c40c](https://github.com/googleapis/python-bigquery-dataframes/commit/ee2c40c6789535e259fb6a9774831d6913d16212))
17+
* Support len() on Groupby objects ([#2183](https://github.com/googleapis/python-bigquery-dataframes/issues/2183)) ([4191821](https://github.com/googleapis/python-bigquery-dataframes/commit/4191821b0976281a96c8965336ef51f061b0c481))
18+
* Support pa.json_(pa.string()) in struct/list if available ([#2180](https://github.com/googleapis/python-bigquery-dataframes/issues/2180)) ([5ec3cc0](https://github.com/googleapis/python-bigquery-dataframes/commit/5ec3cc0298c7a6195d5bd12a08d996e7df57fc5f))
19+
20+
21+
### Documentation
22+
23+
* Update AI operators deprecation notice ([#2182](https://github.com/googleapis/python-bigquery-dataframes/issues/2182)) ([2c50310](https://github.com/googleapis/python-bigquery-dataframes/commit/2c503107e17c59232b14b0d7bc40c350bb087d6f))
24+
725
## [2.26.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.25.0...v2.26.0) (2025-10-17)
826

927

bigframes/core/compile/ibis_compiler/scalar_op_compiler.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from bigframes.core import agg_expressions, ordering
2727
import bigframes.core.compile.ibis_types
2828
import bigframes.core.expression as ex
29+
from bigframes.operations import numeric_ops
2930

3031
if TYPE_CHECKING:
3132
import bigframes.operations as ops
@@ -267,3 +268,13 @@ def _convert_range_ordering_to_table_value(
267268

268269
# Singleton compiler
269270
scalar_op_compiler = ExpressionCompiler()
271+
272+
273+
@scalar_op_compiler.register_unary_op(numeric_ops.isnan_op)
274+
def isnanornull(arg):
275+
return arg.isnan()
276+
277+
278+
@scalar_op_compiler.register_unary_op(numeric_ops.isfinite_op)
279+
def isfinite(arg):
280+
return arg.isinf().negate() & arg.isnan().negate()

bigframes/core/compile/polars/operations/numeric_ops.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,21 @@ def sqrt_op_impl(
152152
import polars as pl
153153

154154
return pl.when(input < 0).then(float("nan")).otherwise(input.sqrt())
155+
156+
157+
@polars_compiler.register_op(numeric_ops.IsNanOp)
158+
def is_nan_op_impl(
159+
compiler: polars_compiler.PolarsExpressionCompiler,
160+
op: numeric_ops.IsNanOp, # type: ignore
161+
input: pl.Expr,
162+
) -> pl.Expr:
163+
return input.is_nan()
164+
165+
166+
@polars_compiler.register_op(numeric_ops.IsFiniteOp)
167+
def is_finite_op_impl(
168+
compiler: polars_compiler.PolarsExpressionCompiler,
169+
op: numeric_ops.IsFiniteOp, # type: ignore
170+
input: pl.Expr,
171+
) -> pl.Expr:
172+
return input.is_finite()

bigframes/core/compile/sqlglot/expressions/generic_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ def _(expr: TypedExpr) -> sge.Expression:
7676
@register_unary_op(ops.invert_op)
7777
def _(expr: TypedExpr) -> sge.Expression:
7878
if expr.dtype == dtypes.BOOL_DTYPE:
79-
return sge.Not(this=expr.expr)
80-
return sge.BitwiseNot(this=expr.expr)
79+
return sge.Not(this=sge.paren(expr.expr))
80+
return sge.BitwiseNot(this=sge.paren(expr.expr))
8181

8282

8383
@register_unary_op(ops.isnull_op)

bigframes/core/compile/sqlglot/expressions/numeric_ops.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import bigframes.core.compile.sqlglot.expressions.constants as constants
2323
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
2424
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
25+
from bigframes.operations import numeric_ops
2526

2627
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
2728
register_binary_op = scalar_compiler.scalar_op_compiler.register_binary_op
@@ -189,7 +190,7 @@ def _(expr: TypedExpr) -> sge.Expression:
189190

190191
@register_unary_op(ops.neg_op)
191192
def _(expr: TypedExpr) -> sge.Expression:
192-
return sge.Neg(this=expr.expr)
193+
return sge.Neg(this=sge.paren(expr.expr))
193194

194195

195196
@register_unary_op(ops.pos_op)
@@ -408,6 +409,21 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
408409
)
409410

410411

412+
@register_unary_op(numeric_ops.isnan_op)
413+
def isnan(arg: TypedExpr) -> sge.Expression:
414+
return sge.IsNan(this=arg.expr)
415+
416+
417+
@register_unary_op(numeric_ops.isfinite_op)
418+
def isfinite(arg: TypedExpr) -> sge.Expression:
419+
return sge.Not(
420+
this=sge.Or(
421+
this=sge.IsInf(this=arg.expr),
422+
right=sge.IsNan(this=arg.expr),
423+
),
424+
)
425+
426+
411427
def _coerce_bool_to_int(typed_expr: TypedExpr) -> sge.Expression:
412428
"""Coerce boolean expression to integer."""
413429
if typed_expr.dtype == dtypes.BOOL_DTYPE:

bigframes/core/groupby/dataframe_group_by.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,9 @@ def __iter__(self) -> Iterable[Tuple[blocks.Label, df.DataFrame]]:
177177
filtered_df = df.DataFrame(filtered_block)
178178
yield group_keys, filtered_df
179179

180+
def __len__(self) -> int:
181+
return len(self.agg([]))
182+
180183
def size(self) -> typing.Union[df.DataFrame, series.Series]:
181184
agg_block, _ = self._block.aggregate_size(
182185
by_column_ids=self._by_col_ids,
@@ -275,6 +278,76 @@ def var(
275278
self._raise_on_non_numeric("var")
276279
return self._aggregate_all(agg_ops.var_op, numeric_only=True)
277280

281+
def corr(
282+
self,
283+
*,
284+
numeric_only: bool = False,
285+
) -> df.DataFrame:
286+
if not numeric_only:
287+
self._raise_on_non_numeric("corr")
288+
if len(self._selected_cols) > 30:
289+
raise ValueError(
290+
f"Cannot calculate corr on >30 columns, dataframe has {len(self._selected_cols)} selected columns."
291+
)
292+
293+
labels = self._block._get_labels_for_columns(self._selected_cols)
294+
block = self._block
295+
aggregations = [
296+
agg_expressions.BinaryAggregation(
297+
agg_ops.CorrOp(), ex.deref(left_col), ex.deref(right_col)
298+
)
299+
for left_col in self._selected_cols
300+
for right_col in self._selected_cols
301+
]
302+
# unique columns stops
303+
uniq_orig_columns = utils.combine_indices(labels, pd.Index(range(len(labels))))
304+
result_labels = utils.cross_indices(uniq_orig_columns, uniq_orig_columns)
305+
306+
block, _ = block.aggregate(
307+
by_column_ids=self._by_col_ids,
308+
aggregations=aggregations,
309+
column_labels=result_labels,
310+
)
311+
312+
block = block.stack(levels=labels.nlevels + 1)
313+
# Drop the last level of each index, which was created to guarantee uniqueness
314+
return df.DataFrame(block).droplevel(-1, axis=0).droplevel(-1, axis=1)
315+
316+
def cov(
317+
self,
318+
*,
319+
numeric_only: bool = False,
320+
) -> df.DataFrame:
321+
if not numeric_only:
322+
self._raise_on_non_numeric("cov")
323+
if len(self._selected_cols) > 30:
324+
raise ValueError(
325+
f"Cannot calculate cov on >30 columns, dataframe has {len(self._selected_cols)} selected columns."
326+
)
327+
328+
labels = self._block._get_labels_for_columns(self._selected_cols)
329+
block = self._block
330+
aggregations = [
331+
agg_expressions.BinaryAggregation(
332+
agg_ops.CovOp(), ex.deref(left_col), ex.deref(right_col)
333+
)
334+
for left_col in self._selected_cols
335+
for right_col in self._selected_cols
336+
]
337+
# unique columns stops
338+
uniq_orig_columns = utils.combine_indices(labels, pd.Index(range(len(labels))))
339+
result_labels = utils.cross_indices(uniq_orig_columns, uniq_orig_columns)
340+
341+
block, _ = block.aggregate(
342+
by_column_ids=self._by_col_ids,
343+
aggregations=aggregations,
344+
column_labels=result_labels,
345+
)
346+
347+
block = block.stack(levels=labels.nlevels + 1)
348+
# Drop the last level of each index, which was created to guarantee uniqueness
349+
return df.DataFrame(block).droplevel(-1, axis=0).droplevel(-1, axis=1)
350+
278351
def skew(
279352
self,
280353
*,

bigframes/core/groupby/series_group_by.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ def __iter__(self) -> Iterable[Tuple[blocks.Label, series.Series]]:
108108
filtered_series.name = self._value_name
109109
yield group_keys, filtered_series
110110

111+
def __len__(self) -> int:
112+
return len(self.agg([]))
113+
111114
def all(self) -> series.Series:
112115
return self._aggregate(agg_ops.all_op)
113116

@@ -275,9 +278,9 @@ def agg(self, func=None) -> typing.Union[df.DataFrame, series.Series]:
275278
if column_names:
276279
agg_block = agg_block.with_column_labels(column_names)
277280

278-
if len(aggregations) > 1:
279-
return df.DataFrame(agg_block)
280-
return series.Series(agg_block)
281+
if len(aggregations) == 1:
282+
return series.Series(agg_block)
283+
return df.DataFrame(agg_block)
281284

282285
aggregate = agg
283286

bigframes/dtypes.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,12 @@ def is_struct_like(type_: ExpressionType) -> bool:
340340
)
341341

342342

343+
def is_json_arrow_type(type_: pa.DataType) -> bool:
344+
return isinstance(type_, db_dtypes.JSONArrowType) or (
345+
hasattr(pa, "JsonType") and isinstance(type_, pa.JsonType)
346+
)
347+
348+
343349
def is_json_like(type_: ExpressionType) -> bool:
344350
return type_ == JSON_DTYPE or type_ == STRING_DTYPE # Including JSON string
345351

@@ -510,6 +516,10 @@ def arrow_dtype_to_bigframes_dtype(
510516
if arrow_dtype == pa.null():
511517
return DEFAULT_DTYPE
512518

519+
# Allow both db_dtypes.JSONArrowType() and pa.json_(pa.string())
520+
if is_json_arrow_type(arrow_dtype):
521+
return JSON_DTYPE
522+
513523
# No other types matched.
514524
raise TypeError(
515525
f"Unexpected Arrow data type {arrow_dtype}. {constants.FEEDBACK_LINK}"

bigframes/operations/numeric_ops.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,3 +348,19 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
348348
name="unsafe_pow_op", type_signature=op_typing.BINARY_REAL_NUMERIC
349349
)
350350
unsafe_pow_op = UnsafePowOp()
351+
352+
IsNanOp = base_ops.create_unary_op(
353+
name="isnan",
354+
type_signature=op_typing.FixedOutputType(
355+
dtypes.is_numeric, dtypes.BOOL_DTYPE, "numeric"
356+
),
357+
)
358+
isnan_op = IsNanOp()
359+
360+
IsFiniteOp = base_ops.create_unary_op(
361+
name="isfinite",
362+
type_signature=op_typing.FixedOutputType(
363+
dtypes.is_numeric, dtypes.BOOL_DTYPE, "numeric"
364+
),
365+
)
366+
isfinite_op = IsFiniteOp()

bigframes/operations/numpy_op_maps.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
np.ceil: numeric_ops.ceil_op,
4141
np.log1p: numeric_ops.log1p_op,
4242
np.expm1: numeric_ops.expm1_op,
43+
np.isnan: numeric_ops.isnan_op,
44+
np.isfinite: numeric_ops.isfinite_op,
4345
}
4446

4547

0 commit comments

Comments
 (0)