Skip to content

Commit 4462425

Browse files
authored
Merge branch 'main' into tswast-log
2 parents 2b97c2b + 62f7e9f commit 4462425

File tree

11 files changed

+293
-17
lines changed

11 files changed

+293
-17
lines changed

bigframes/core/compile/polars/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
# isn't installed.
2626
import bigframes.core.compile.polars.operations.generic_ops # noqa: F401
2727
import bigframes.core.compile.polars.operations.numeric_ops # noqa: F401
28+
import bigframes.core.compile.polars.operations.struct_ops # noqa: F401
2829

2930
try:
3031
import bigframes._importing
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
BigFrames -> Polars compilation for the operations in bigframes.operations.generic_ops.
17+
18+
Please keep implementations in sequential order by op name.
19+
"""
20+
21+
from __future__ import annotations
22+
23+
from typing import TYPE_CHECKING
24+
25+
import bigframes_vendored.constants
26+
27+
import bigframes.core.compile.polars.compiler as polars_compiler
28+
from bigframes.operations import struct_ops
29+
30+
if TYPE_CHECKING:
31+
import polars as pl
32+
33+
34+
@polars_compiler.register_op(struct_ops.StructFieldOp)
35+
def struct_field_op_impl(
36+
compiler: polars_compiler.PolarsExpressionCompiler,
37+
op: struct_ops.StructFieldOp, # type: ignore
38+
input: pl.Expr,
39+
) -> pl.Expr:
40+
if isinstance(op.name_or_index, str):
41+
name = op.name_or_index
42+
else:
43+
raise NotImplementedError(
44+
"Referencing a struct field by number not implemented in polars compiler. "
45+
f"{bigframes_vendored.constants.FEEDBACK_LINK}"
46+
)
47+
48+
return input.struct.field(name)

bigframes/core/compile/sqlglot/expressions/generic_ops.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
2424

2525
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
26+
register_ternary_op = scalar_compiler.scalar_op_compiler.register_ternary_op
2627

2728

2829
@register_unary_op(ops.AsTypeOp, pass_op=True)
@@ -66,6 +67,18 @@ def _(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression:
6667
return _cast(sg_expr, sg_to_type, op.safe)
6768

6869

70+
@register_ternary_op(ops.clip_op)
71+
def _(
72+
original: TypedExpr,
73+
lower: TypedExpr,
74+
upper: TypedExpr,
75+
) -> sge.Expression:
76+
return sge.Greatest(
77+
this=sge.Least(this=original.expr, expressions=[upper.expr]),
78+
expressions=[lower.expr],
79+
)
80+
81+
6982
@register_unary_op(ops.hash_op)
7083
def _(expr: TypedExpr) -> sge.Expression:
7184
return sge.func("FARM_FINGERPRINT", expr.expr)
@@ -94,6 +107,13 @@ def _(expr: TypedExpr) -> sge.Expression:
94107
return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null()))
95108

96109

110+
@register_ternary_op(ops.where_op)
111+
def _(
112+
original: TypedExpr, condition: TypedExpr, replacement: TypedExpr
113+
) -> sge.Expression:
114+
return sge.If(this=condition.expr, true=original.expr, false=replacement.expr)
115+
116+
97117
# Helper functions
98118
def _cast_to_json(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression:
99119
from_type = expr.dtype

bigframes/operations/structs.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717
import bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors
1818
import pandas as pd
1919

20-
from bigframes.core import log_adapter
20+
from bigframes.core import backports, log_adapter
2121
import bigframes.dataframe
22-
import bigframes.dtypes
2322
import bigframes.operations
2423
import bigframes.operations.base
2524
import bigframes.series
@@ -45,17 +44,24 @@ def explode(self) -> bigframes.dataframe.DataFrame:
4544

4645
pa_type = self._dtype.pyarrow_dtype
4746
return bigframes.pandas.concat(
48-
[self.field(i) for i in range(pa_type.num_fields)], axis="columns"
47+
[
48+
self.field(field.name)
49+
for field in backports.pyarrow_struct_type_fields(pa_type)
50+
],
51+
axis="columns",
4952
)
5053

54+
@property
5155
def dtypes(self) -> pd.Series:
5256
pa_type = self._dtype.pyarrow_dtype
5357
return pd.Series(
5458
data=[
55-
bigframes.dtypes.arrow_dtype_to_bigframes_dtype(pa_type.field(i).type)
56-
for i in range(pa_type.num_fields)
59+
pd.ArrowDtype(field.type)
60+
for field in backports.pyarrow_struct_type_fields(pa_type)
61+
],
62+
index=[
63+
field.name for field in backports.pyarrow_struct_type_fields(pa_type)
5764
],
58-
index=[pa_type.field(i).name for i in range(pa_type.num_fields)],
5965
)
6066

6167

notebooks/data_types/struct.ipynb

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -211,11 +211,11 @@
211211
{
212212
"data": {
213213
"text/plain": [
214-
"0 [{'tables': {'score': 0.9349926710128784, 'val...\n",
215-
"1 [{'tables': {'score': 0.9690881371498108, 'val...\n",
216-
"2 [{'tables': {'score': 0.8667634129524231, 'val...\n",
217-
"3 [{'tables': {'score': 0.9351968765258789, 'val...\n",
218-
"4 [{'tables': {'score': 0.8572560548782349, 'val...\n",
214+
"0 [{'tables': {'score': 0.8667634129524231, 'val...\n",
215+
"1 [{'tables': {'score': 0.9351968765258789, 'val...\n",
216+
"2 [{'tables': {'score': 0.8572560548782349, 'val...\n",
217+
"3 [{'tables': {'score': 0.9690881371498108, 'val...\n",
218+
"4 [{'tables': {'score': 0.9349926710128784, 'val...\n",
219219
"Name: predicted_default_payment_next_month, dtype: list<item: struct<tables: struct<score: double, value: string>>>[pyarrow]"
220220
]
221221
},
@@ -267,7 +267,7 @@
267267
}
268268
],
269269
"source": [
270-
"df['Address'].struct.dtypes()"
270+
"df['Address'].struct.dtypes"
271271
]
272272
},
273273
{
@@ -461,7 +461,7 @@
461461
],
462462
"metadata": {
463463
"kernelspec": {
464-
"display_name": "Python 3",
464+
"display_name": "venv",
465465
"language": "python",
466466
"name": "python3"
467467
},
@@ -475,7 +475,7 @@
475475
"name": "python",
476476
"nbconvert_exporter": "python",
477477
"pygments_lexer": "ipython3",
478-
"version": "3.12.1"
478+
"version": "3.12.9"
479479
}
480480
},
481481
"nbformat": 4,

tests/system/small/engines/test_generic_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def test_engines_astype_timedelta(scalars_array_value: array_value.ArrayValue, e
314314
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
315315

316316

317-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
317+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
318318
def test_engines_where_op(scalars_array_value: array_value.ArrayValue, engine):
319319
arr, _ = scalars_array_value.compute_values(
320320
[
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`int64_col` AS `bfcol_0`,
4+
`int64_too` AS `bfcol_1`,
5+
`rowindex` AS `bfcol_2`
6+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
7+
), `bfcte_1` AS (
8+
SELECT
9+
*,
10+
GREATEST(LEAST(`bfcol_2`, `bfcol_1`), `bfcol_0`) AS `bfcol_3`
11+
FROM `bfcte_0`
12+
)
13+
SELECT
14+
`bfcol_3` AS `result_col`
15+
FROM `bfcte_1`
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`bool_col` AS `bfcol_0`,
4+
`int64_col` AS `bfcol_1`,
5+
`float64_col` AS `bfcol_2`
6+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
7+
), `bfcte_1` AS (
8+
SELECT
9+
*,
10+
IF(`bfcol_0`, `bfcol_1`, `bfcol_2`) AS `bfcol_3`
11+
FROM `bfcte_0`
12+
)
13+
SELECT
14+
`bfcol_3` AS `result_col`
15+
FROM `bfcte_1`

tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,22 @@ def test_astype_json_invalid(
168168
)
169169

170170

171+
def test_clip(scalar_types_df: bpd.DataFrame, snapshot):
172+
op_expr = ops.clip_op.as_expr("rowindex", "int64_col", "int64_too")
173+
174+
array_value = scalar_types_df._block.expr
175+
result, col_ids = array_value.compute_values([op_expr])
176+
177+
# Rename columns for deterministic golden SQL results.
178+
assert len(col_ids) == 1
179+
result = result.rename_columns({col_ids[0]: "result_col"}).select_columns(
180+
["result_col"]
181+
)
182+
183+
sql = result.session._executor.to_sql(result, enable_cache=False)
184+
snapshot.assert_match(sql, "out.sql")
185+
186+
171187
def test_hash(scalar_types_df: bpd.DataFrame, snapshot):
172188
col_name = "string_col"
173189
bf_df = scalar_types_df[[col_name]]
@@ -202,3 +218,19 @@ def test_map(scalar_types_df: bpd.DataFrame, snapshot):
202218
)
203219

204220
snapshot.assert_match(sql, "out.sql")
221+
222+
223+
def test_where(scalar_types_df: bpd.DataFrame, snapshot):
224+
op_expr = ops.where_op.as_expr("int64_col", "bool_col", "float64_col")
225+
226+
array_value = scalar_types_df._block.expr
227+
result, col_ids = array_value.compute_values([op_expr])
228+
229+
# Rename columns for deterministic golden SQL results.
230+
assert len(col_ids) == 1
231+
result = result.rename_columns({col_ids[0]: "result_col"}).select_columns(
232+
["result_col"]
233+
)
234+
235+
sql = result.session._executor.to_sql(result, enable_cache=False)
236+
snapshot.assert_match(sql, "out.sql")

0 commit comments

Comments
 (0)