Skip to content

Commit 0259a7b

Browse files
authored
Merge branch 'main' into routine-cleanup
2 parents e333a9b + 7ce0ac5 commit 0259a7b

File tree

62 files changed

+6783
-379
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+6783
-379
lines changed

bigframes/blob/_functions.py

Lines changed: 285 additions & 183 deletions
Large diffs are not rendered by default.

bigframes/core/compile/polars/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
# polars shouldn't be needed at import time, as register is a no-op if polars
2525
# isn't installed.
2626
import bigframes.core.compile.polars.operations.generic_ops # noqa: F401
27+
import bigframes.core.compile.polars.operations.numeric_ops # noqa: F401
28+
import bigframes.core.compile.polars.operations.struct_ops # noqa: F401
2729

2830
try:
2931
import bigframes._importing

bigframes/core/compile/polars/compiler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -493,9 +493,9 @@ def compile_agg_op(
493493
if isinstance(op, agg_ops.MedianOp):
494494
return pl.median(*inputs)
495495
if isinstance(op, agg_ops.AllOp):
496-
return pl.all(*inputs)
496+
return pl.col(inputs).cast(pl.Boolean).all()
497497
if isinstance(op, agg_ops.AnyOp):
498-
return pl.any(*inputs) # type: ignore
498+
return pl.col(inputs).cast(pl.Boolean).any()
499499
if isinstance(op, agg_ops.NuniqueOp):
500500
return pl.col(*inputs).drop_nulls().n_unique()
501501
if isinstance(op, agg_ops.MinOp):
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
BigFrames -> Polars compilation for the operations in bigframes.operations.numeric_ops.
17+
18+
Please keep implementations in sequential order by op name.
19+
"""
20+
21+
from __future__ import annotations
22+
23+
from typing import TYPE_CHECKING
24+
25+
import bigframes.core.compile.polars.compiler as polars_compiler
26+
from bigframes.operations import numeric_ops
27+
28+
if TYPE_CHECKING:
29+
import polars as pl
30+
31+
32+
@polars_compiler.register_op(numeric_ops.CosOp)
33+
def cos_op_impl(
34+
compiler: polars_compiler.PolarsExpressionCompiler,
35+
op: numeric_ops.CosOp, # type: ignore
36+
input: pl.Expr,
37+
) -> pl.Expr:
38+
return input.cos()
39+
40+
41+
@polars_compiler.register_op(numeric_ops.LnOp)
42+
def ln_op_impl(
43+
compiler: polars_compiler.PolarsExpressionCompiler,
44+
op: numeric_ops.LnOp, # type: ignore
45+
input: pl.Expr,
46+
) -> pl.Expr:
47+
import polars as pl
48+
49+
return pl.when(input <= 0).then(float("nan")).otherwise(input.log())
50+
51+
52+
@polars_compiler.register_op(numeric_ops.Log10Op)
53+
def log10_op_impl(
54+
compiler: polars_compiler.PolarsExpressionCompiler,
55+
op: numeric_ops.Log10Op, # type: ignore
56+
input: pl.Expr,
57+
) -> pl.Expr:
58+
import polars as pl
59+
60+
return pl.when(input <= 0).then(float("nan")).otherwise(input.log(base=10))
61+
62+
63+
@polars_compiler.register_op(numeric_ops.Log1pOp)
64+
def log1p_op_impl(
65+
compiler: polars_compiler.PolarsExpressionCompiler,
66+
op: numeric_ops.Log1pOp, # type: ignore
67+
input: pl.Expr,
68+
) -> pl.Expr:
69+
import polars as pl
70+
71+
return pl.when(input <= -1).then(float("nan")).otherwise((input + 1).log())
72+
73+
74+
@polars_compiler.register_op(numeric_ops.SinOp)
75+
def sin_op_impl(
76+
compiler: polars_compiler.PolarsExpressionCompiler,
77+
op: numeric_ops.SinOp, # type: ignore
78+
input: pl.Expr,
79+
) -> pl.Expr:
80+
return input.sin()
81+
82+
83+
@polars_compiler.register_op(numeric_ops.SqrtOp)
84+
def sqrt_op_impl(
85+
compiler: polars_compiler.PolarsExpressionCompiler,
86+
op: numeric_ops.SqrtOp, # type: ignore
87+
input: pl.Expr,
88+
) -> pl.Expr:
89+
import polars as pl
90+
91+
return pl.when(input < 0).then(float("nan")).otherwise(input.sqrt())
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
BigFrames -> Polars compilation for the operations in bigframes.operations.generic_ops.
17+
18+
Please keep implementations in sequential order by op name.
19+
"""
20+
21+
from __future__ import annotations
22+
23+
from typing import TYPE_CHECKING
24+
25+
import bigframes_vendored.constants
26+
27+
import bigframes.core.compile.polars.compiler as polars_compiler
28+
from bigframes.operations import struct_ops
29+
30+
if TYPE_CHECKING:
31+
import polars as pl
32+
33+
34+
@polars_compiler.register_op(struct_ops.StructFieldOp)
35+
def struct_field_op_impl(
36+
compiler: polars_compiler.PolarsExpressionCompiler,
37+
op: struct_ops.StructFieldOp, # type: ignore
38+
input: pl.Expr,
39+
) -> pl.Expr:
40+
if isinstance(op.name_or_index, str):
41+
name = op.name_or_index
42+
else:
43+
raise NotImplementedError(
44+
"Referencing a struct field by number not implemented in polars compiler. "
45+
f"{bigframes_vendored.constants.FEEDBACK_LINK}"
46+
)
47+
48+
return input.struct.field(name)

bigframes/core/compile/sqlglot/aggregations/windows.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,9 @@ def apply_window_if_present(
4040
# Unbound grouping window.
4141
order_by = None
4242
elif window.is_range_bounded:
43-
# Note that, when the window is range-bounded, we only need one ordering key.
44-
# There are two reasons:
45-
# 1. Manipulating null positions requires more than one ordering key, which
46-
# is forbidden by SQL window syntax for range rolling.
47-
# 2. Pandas does not allow range rolling on timeseries with nulls.
48-
order_by = get_window_order_by((window.ordering[0],), override_null_order=False)
43+
order_by = get_window_order_by((window.ordering[0],))
4944
else:
50-
order_by = get_window_order_by(window.ordering, override_null_order=True)
45+
order_by = get_window_order_by(window.ordering)
5146

5247
order = sge.Order(expressions=order_by) if order_by else None
5348

@@ -102,7 +97,15 @@ def get_window_order_by(
10297
ordering: typing.Tuple[ordering_spec.OrderingExpression, ...],
10398
override_null_order: bool = False,
10499
) -> typing.Optional[tuple[sge.Ordered, ...]]:
105-
"""Returns the SQL order by clause for a window specification."""
100+
"""Returns the SQL order by clause for a window specification.
101+
Args:
102+
ordering (Tuple[ordering_spec.OrderingExpression, ...]):
103+
A tuple of ordering specification objects.
104+
override_null_order (bool):
105+
If True, overrides BigQuery's default null ordering behavior, which
106+
is sometimes incompatible with ordered aggregations. The generated SQL
107+
will include extra expressions to correctly enforce NULL FIRST/LAST.
108+
"""
106109
if not ordering:
107110
return None
108111

@@ -115,8 +118,6 @@ def get_window_order_by(
115118
nulls_first = not ordering_spec_item.na_last
116119

117120
if override_null_order:
118-
# Bigquery SQL considers NULLS to be "smallest" values, but we need
119-
# to override in these cases.
120121
is_null_expr = sge.Is(this=expr, expression=sge.Null())
121122
if nulls_first and desc:
122123
order_by.append(

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,8 @@ def compile_random_sample(
299299
def compile_aggregate(
300300
self, node: nodes.AggregateNode, child: ir.SQLGlotIR
301301
) -> ir.SQLGlotIR:
302+
# The BigQuery ordered aggregation cannot support for NULL FIRST/LAST,
303+
# so we need to add extra expressions to enforce the null ordering.
302304
ordering_cols = windows.get_window_order_by(
303305
node.order_by, override_null_order=True
304306
)

bigframes/core/compile/sqlglot/expressions/generic_ops.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
2424

2525
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
26+
register_ternary_op = scalar_compiler.scalar_op_compiler.register_ternary_op
2627

2728

2829
@register_unary_op(ops.AsTypeOp, pass_op=True)
@@ -66,6 +67,18 @@ def _(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression:
6667
return _cast(sg_expr, sg_to_type, op.safe)
6768

6869

70+
@register_ternary_op(ops.clip_op)
71+
def _(
72+
original: TypedExpr,
73+
lower: TypedExpr,
74+
upper: TypedExpr,
75+
) -> sge.Expression:
76+
return sge.Greatest(
77+
this=sge.Least(this=original.expr, expressions=[upper.expr]),
78+
expressions=[lower.expr],
79+
)
80+
81+
6982
@register_unary_op(ops.hash_op)
7083
def _(expr: TypedExpr) -> sge.Expression:
7184
return sge.func("FARM_FINGERPRINT", expr.expr)
@@ -94,6 +107,13 @@ def _(expr: TypedExpr) -> sge.Expression:
94107
return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null()))
95108

96109

110+
@register_ternary_op(ops.where_op)
111+
def _(
112+
original: TypedExpr, condition: TypedExpr, replacement: TypedExpr
113+
) -> sge.Expression:
114+
return sge.If(this=condition.expr, true=original.expr, false=replacement.expr)
115+
116+
97117
# Helper functions
98118
def _cast_to_json(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression:
99119
from_type = expr.dtype

bigframes/core/compile/sqlglot/expressions/numeric_ops.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def _(expr: TypedExpr) -> sge.Expression:
158158
return sge.Case(
159159
ifs=[
160160
sge.If(
161-
this=expr.expr < sge.convert(0),
161+
this=expr.expr <= sge.convert(0),
162162
true=constants._NAN,
163163
)
164164
],
@@ -171,7 +171,7 @@ def _(expr: TypedExpr) -> sge.Expression:
171171
return sge.Case(
172172
ifs=[
173173
sge.If(
174-
this=expr.expr < sge.convert(0),
174+
this=expr.expr <= sge.convert(0),
175175
true=constants._NAN,
176176
)
177177
],
@@ -184,7 +184,7 @@ def _(expr: TypedExpr) -> sge.Expression:
184184
return sge.Case(
185185
ifs=[
186186
sge.If(
187-
this=expr.expr < sge.convert(-1),
187+
this=expr.expr <= sge.convert(-1),
188188
true=constants._NAN,
189189
)
190190
],

0 commit comments

Comments
 (0)