From 1b800e51f076427cbd063407aabb6303a66ba986 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 24 Oct 2025 21:14:12 +0000 Subject: [PATCH 1/5] refactor: add struct_op for the sqlglot compiler --- .../compile/sqlglot/expressions/struct_ops.py | 11 +++++++ .../test_struct_ops/test_struct_op/out.sql | 21 ++++++++++++ .../sqlglot/expressions/test_struct_ops.py | 32 +++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/struct_ops.py b/bigframes/core/compile/sqlglot/expressions/struct_ops.py index ebd3a38397..b6ec101eb1 100644 --- a/bigframes/core/compile/sqlglot/expressions/struct_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/struct_ops.py @@ -24,6 +24,7 @@ from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler +register_nary_op = scalar_compiler.scalar_op_compiler.register_nary_op register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op @@ -40,3 +41,13 @@ def _(expr: TypedExpr, op: ops.StructFieldOp) -> sge.Expression: this=sge.to_identifier(name, quoted=True), catalog=expr.expr, ) + + +@register_nary_op(ops.StructOp, pass_op=True) +def _(*exprs: TypedExpr, op: ops.StructOp) -> sge.Struct: + return sge.Struct( + expressions=[ + sge.PropertyEQ(this=sge.to_identifier(col), expression=expr.expr) + for col, expr in zip(op.column_names, exprs) + ] + ) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql new file mode 100644 index 0000000000..f7f741a523 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql @@ -0,0 +1,21 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `int64_col` AS `bfcol_1`, + `float64_col` AS `bfcol_2`, + `string_col` AS `bfcol_3` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + STRUCT( + `bfcol_0` AS bool_col, + `bfcol_1` AS int64_col, + `bfcol_2` AS float64_col, + `bfcol_3` AS string_col + ) AS `bfcol_4` + FROM `bfcte_0` +) +SELECT + `bfcol_4` AS `result_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py index 19156ead99..7e67e44cd3 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py @@ -12,15 +12,39 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import pytest from bigframes import operations as ops +from bigframes.core import expression as ex import bigframes.pandas as bpd from bigframes.testing import utils pytest.importorskip("pytest_snapshot") +def _apply_nary_op( + obj: bpd.DataFrame, + op: ops.NaryOp, + *args: typing.Union[str, ex.Expression], +) -> str: + """Applies a nary op to the given DataFrame and return the SQL representing + the resulting DataFrame.""" + array_value = obj._block.expr + op_expr = op.as_expr(*args) + result, col_ids = array_value.compute_values([op_expr]) + + # Rename columns for deterministic golden SQL results. + assert len(col_ids) == 1 + result = result.rename_columns({col_ids[0]: "result_col"}).select_columns( + ["result_col"] + ) + + sql = result.session._executor.to_sql(result, enable_cache=False) + return sql + + def test_struct_field(nested_structs_types_df: bpd.DataFrame, snapshot): col_name = "people" bf_df = nested_structs_types_df[[col_name]] @@ -34,3 +58,11 @@ def test_struct_field(nested_structs_types_df: bpd.DataFrame, snapshot): sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") + + +def test_struct_op(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["bool_col", "int64_col", "float64_col", "string_col"]] + op = ops.StructOp(column_names=tuple(bf_df.columns.tolist())) + sql = _apply_nary_op(bf_df, op, *bf_df.columns.tolist()) + + snapshot.assert_match(sql, "out.sql") From facf9379440e63cd9ba27528184243c650842782 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 24 Oct 2025 21:21:46 +0000 Subject: [PATCH 2/5] reuse the testing utils.py method --- bigframes/testing/utils.py | 13 +++++----- .../test_struct_ops/test_struct_op/out.sql | 2 +- .../expressions/test_comparison_ops.py | 2 +- .../sqlglot/expressions/test_json_ops.py | 2 +- .../sqlglot/expressions/test_numeric_ops.py | 10 +++---- .../sqlglot/expressions/test_string_ops.py | 4 +-- .../sqlglot/expressions/test_struct_ops.py | 26 +------------------ 7 files changed, 17 insertions(+), 42 deletions(-) diff --git a/bigframes/testing/utils.py b/bigframes/testing/utils.py index b4daab7aad..7785873f0c 100644 --- a/bigframes/testing/utils.py +++ b/bigframes/testing/utils.py @@ -467,21 +467,20 @@ def _apply_unary_ops( return sql -def _apply_binary_op( +def _apply_nary_op( obj: bpd.DataFrame, - op: ops.BinaryOp, - l_arg: str, - r_arg: Union[str, ex.Expression], + op: Union[ops.BinaryOp, ops.NaryOp], + *args: Union[str, ex.Expression], ) -> str: - """Applies a binary op to the given DataFrame and return the SQL representing + """Applies a nary op to the given DataFrame and return the SQL representing the resulting DataFrame.""" array_value = obj._block.expr - op_expr = op.as_expr(l_arg, r_arg) + op_expr = op.as_expr(*args) result, col_ids = array_value.compute_values([op_expr]) # Rename columns for deterministic golden SQL results. assert len(col_ids) == 1 - result = result.rename_columns({col_ids[0]: l_arg}).select_columns([l_arg]) + result = result.rename_columns({col_ids[0]: args[0]}).select_columns([args[0]]) sql = result.session._executor.to_sql(result, enable_cache=False) return sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql index f7f741a523..bab586a716 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql @@ -17,5 +17,5 @@ WITH `bfcte_0` AS ( FROM `bfcte_0` ) SELECT - `bfcol_4` AS `result_col` + `bfcol_4` AS `bool_col` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py index 6c3eb64414..00188ce78a 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py @@ -46,7 +46,7 @@ def test_is_in(scalar_types_df: bpd.DataFrame, snapshot): def test_eq_null_match(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "bool_col"]] - sql = utils._apply_binary_op(bf_df, ops.eq_null_match_op, "int64_col", "bool_col") + sql = utils._apply_nary_op(bf_df, ops.eq_null_match_op, "int64_col", "bool_col") snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py index 75206091e0..5a1a4abef1 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py @@ -102,7 +102,7 @@ def test_to_json_string(json_types_df: bpd.DataFrame, snapshot): def test_json_set(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] - sql = utils._apply_binary_op( + sql = utils._apply_nary_op( bf_df, ops.JSONSet(json_path="$.a"), "json_col", ex.const(100) ) diff --git a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py index fe9a53a558..b4904e92ce 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py @@ -221,7 +221,7 @@ def test_add_numeric(scalar_types_df: bpd.DataFrame, snapshot): def test_add_string(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["string_col"]] - sql = utils._apply_binary_op(bf_df, ops.add_op, "string_col", ex.const("a")) + sql = utils._apply_nary_op(bf_df, ops.add_op, "string_col", ex.const("a")) snapshot.assert_match(sql, "out.sql") @@ -241,10 +241,10 @@ def test_add_timedelta(scalar_types_df: bpd.DataFrame, snapshot): def test_add_unsupported_raises(scalar_types_df: bpd.DataFrame): with pytest.raises(TypeError): - utils._apply_binary_op(scalar_types_df, ops.add_op, "timestamp_col", "date_col") + utils._apply_nary_op(scalar_types_df, ops.add_op, "timestamp_col", "date_col") with pytest.raises(TypeError): - utils._apply_binary_op(scalar_types_df, ops.add_op, "int64_col", "string_col") + utils._apply_nary_op(scalar_types_df, ops.add_op, "int64_col", "string_col") def test_div_numeric(scalar_types_df: bpd.DataFrame, snapshot): @@ -361,7 +361,7 @@ def test_sub_timedelta(scalar_types_df: bpd.DataFrame, snapshot): def test_sub_unsupported_raises(scalar_types_df: bpd.DataFrame): with pytest.raises(TypeError): - utils._apply_binary_op(scalar_types_df, ops.sub_op, "string_col", "string_col") + utils._apply_nary_op(scalar_types_df, ops.sub_op, "string_col", "string_col") with pytest.raises(TypeError): - utils._apply_binary_op(scalar_types_df, ops.sub_op, "int64_col", "string_col") + utils._apply_nary_op(scalar_types_df, ops.sub_op, "int64_col", "string_col") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py index 99dbce9410..213028844c 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py @@ -308,13 +308,13 @@ def test_zfill(scalar_types_df: bpd.DataFrame, snapshot): def test_add_string(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["string_col"]] - sql = utils._apply_binary_op(bf_df, ops.add_op, "string_col", ex.const("a")) + sql = utils._apply_nary_op(bf_df, ops.add_op, "string_col", ex.const("a")) snapshot.assert_match(sql, "out.sql") def test_strconcat(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["string_col"]] - sql = utils._apply_binary_op(bf_df, ops.strconcat_op, "string_col", ex.const("a")) + sql = utils._apply_nary_op(bf_df, ops.strconcat_op, "string_col", ex.const("a")) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py index 7e67e44cd3..a60c62aa80 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py @@ -12,39 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import typing - import pytest from bigframes import operations as ops -from bigframes.core import expression as ex import bigframes.pandas as bpd from bigframes.testing import utils pytest.importorskip("pytest_snapshot") -def _apply_nary_op( - obj: bpd.DataFrame, - op: ops.NaryOp, - *args: typing.Union[str, ex.Expression], -) -> str: - """Applies a nary op to the given DataFrame and return the SQL representing - the resulting DataFrame.""" - array_value = obj._block.expr - op_expr = op.as_expr(*args) - result, col_ids = array_value.compute_values([op_expr]) - - # Rename columns for deterministic golden SQL results. - assert len(col_ids) == 1 - result = result.rename_columns({col_ids[0]: "result_col"}).select_columns( - ["result_col"] - ) - - sql = result.session._executor.to_sql(result, enable_cache=False) - return sql - - def test_struct_field(nested_structs_types_df: bpd.DataFrame, snapshot): col_name = "people" bf_df = nested_structs_types_df[[col_name]] @@ -63,6 +39,6 @@ def test_struct_field(nested_structs_types_df: bpd.DataFrame, snapshot): def test_struct_op(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["bool_col", "int64_col", "float64_col", "string_col"]] op = ops.StructOp(column_names=tuple(bf_df.columns.tolist())) - sql = _apply_nary_op(bf_df, op, *bf_df.columns.tolist()) + sql = utils._apply_nary_op(bf_df, op, *bf_df.columns.tolist()) snapshot.assert_match(sql, "out.sql") From 62d9d31d20fe471ce921be6fd61cce70a7d152be Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 27 Oct 2025 16:48:39 +0000 Subject: [PATCH 3/5] refactor: add sqlscalarop to the sqlglot compiler --- .../compile/sqlglot/expressions/generic_ops.py | 12 ++++++++++++ .../test_generic_ops/test_sql_scalar_op/out.sql | 14 ++++++++++++++ .../sqlglot/expressions/test_generic_ops.py | 11 +++++++++++ 3 files changed, 37 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_sql_scalar_op/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/bigframes/core/compile/sqlglot/expressions/generic_ops.py index 9782ef11d4..23bf355a69 100644 --- a/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -14,6 +14,7 @@ from __future__ import annotations +import sqlglot as sg import sqlglot.expressions as sge from bigframes import dtypes @@ -80,6 +81,17 @@ def _(expr: TypedExpr) -> sge.Expression: return sge.BitwiseNot(this=sge.paren(expr.expr)) +@register_nary_op(ops.SqlScalarOp, pass_op=True) +def _(*operands: TypedExpr, op: ops.SqlScalarOp) -> sge.Expression: + # TODO: can we include a string in the sqlglot expression without parsing? + return sg.parse_one( + op.sql_template.format( + *[operand.expr.sql(dialect="bigquery") for operand in operands] + ), + dialect="bigquery", + ) + + @register_unary_op(ops.isnull_op) def _(expr: TypedExpr) -> sge.Expression: return sge.Is(this=expr.expr, expression=sge.Null()) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_sql_scalar_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_sql_scalar_op/out.sql new file mode 100644 index 0000000000..a79e006885 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_sql_scalar_op/out.sql @@ -0,0 +1,14 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `bytes_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(`bfcol_0` AS INT64) + BYTE_LENGTH(`bfcol_1`) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `bool_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py index b7abc63213..075416d664 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -261,6 +261,17 @@ def test_notnull(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_sql_scalar_op(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["bool_col", "bytes_col"]] + sql = utils._apply_nary_op( + bf_df, + ops.SqlScalarOp(dtypes.INT_DTYPE, "CAST({0} AS INT64) + BYTE_LENGTH({1})"), + "bool_col", + "bytes_col", + ) + snapshot.assert_match(sql, "out.sql") + + def test_map(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] From a6af1349755d52e643f96ddfb3c4fc5da2ef776e Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 27 Oct 2025 22:30:02 +0000 Subject: [PATCH 4/5] Revert "reuse the testing utils.py method" This reverts commit 9fad51ce3a03b5f664c8ce5de6a9c1e8a1970cd8. --- bigframes/testing/utils.py | 13 +++++----- .../test_struct_ops/test_struct_op/out.sql | 2 +- .../expressions/test_comparison_ops.py | 2 +- .../sqlglot/expressions/test_json_ops.py | 2 +- .../sqlglot/expressions/test_numeric_ops.py | 10 +++---- .../sqlglot/expressions/test_string_ops.py | 4 +-- .../sqlglot/expressions/test_struct_ops.py | 26 ++++++++++++++++++- 7 files changed, 42 insertions(+), 17 deletions(-) diff --git a/bigframes/testing/utils.py b/bigframes/testing/utils.py index 7785873f0c..b4daab7aad 100644 --- a/bigframes/testing/utils.py +++ b/bigframes/testing/utils.py @@ -467,20 +467,21 @@ def _apply_unary_ops( return sql -def _apply_nary_op( +def _apply_binary_op( obj: bpd.DataFrame, - op: Union[ops.BinaryOp, ops.NaryOp], - *args: Union[str, ex.Expression], + op: ops.BinaryOp, + l_arg: str, + r_arg: Union[str, ex.Expression], ) -> str: - """Applies a nary op to the given DataFrame and return the SQL representing + """Applies a binary op to the given DataFrame and return the SQL representing the resulting DataFrame.""" array_value = obj._block.expr - op_expr = op.as_expr(*args) + op_expr = op.as_expr(l_arg, r_arg) result, col_ids = array_value.compute_values([op_expr]) # Rename columns for deterministic golden SQL results. assert len(col_ids) == 1 - result = result.rename_columns({col_ids[0]: args[0]}).select_columns([args[0]]) + result = result.rename_columns({col_ids[0]: l_arg}).select_columns([l_arg]) sql = result.session._executor.to_sql(result, enable_cache=False) return sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql index bab586a716..f7f741a523 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_op/out.sql @@ -17,5 +17,5 @@ WITH `bfcte_0` AS ( FROM `bfcte_0` ) SELECT - `bfcol_4` AS `bool_col` + `bfcol_4` AS `result_col` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py index 00188ce78a..6c3eb64414 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py @@ -46,7 +46,7 @@ def test_is_in(scalar_types_df: bpd.DataFrame, snapshot): def test_eq_null_match(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "bool_col"]] - sql = utils._apply_nary_op(bf_df, ops.eq_null_match_op, "int64_col", "bool_col") + sql = utils._apply_binary_op(bf_df, ops.eq_null_match_op, "int64_col", "bool_col") snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py index 5a1a4abef1..75206091e0 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py @@ -102,7 +102,7 @@ def test_to_json_string(json_types_df: bpd.DataFrame, snapshot): def test_json_set(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] - sql = utils._apply_nary_op( + sql = utils._apply_binary_op( bf_df, ops.JSONSet(json_path="$.a"), "json_col", ex.const(100) ) diff --git a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py index b4904e92ce..fe9a53a558 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py @@ -221,7 +221,7 @@ def test_add_numeric(scalar_types_df: bpd.DataFrame, snapshot): def test_add_string(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["string_col"]] - sql = utils._apply_nary_op(bf_df, ops.add_op, "string_col", ex.const("a")) + sql = utils._apply_binary_op(bf_df, ops.add_op, "string_col", ex.const("a")) snapshot.assert_match(sql, "out.sql") @@ -241,10 +241,10 @@ def test_add_timedelta(scalar_types_df: bpd.DataFrame, snapshot): def test_add_unsupported_raises(scalar_types_df: bpd.DataFrame): with pytest.raises(TypeError): - utils._apply_nary_op(scalar_types_df, ops.add_op, "timestamp_col", "date_col") + utils._apply_binary_op(scalar_types_df, ops.add_op, "timestamp_col", "date_col") with pytest.raises(TypeError): - utils._apply_nary_op(scalar_types_df, ops.add_op, "int64_col", "string_col") + utils._apply_binary_op(scalar_types_df, ops.add_op, "int64_col", "string_col") def test_div_numeric(scalar_types_df: bpd.DataFrame, snapshot): @@ -361,7 +361,7 @@ def test_sub_timedelta(scalar_types_df: bpd.DataFrame, snapshot): def test_sub_unsupported_raises(scalar_types_df: bpd.DataFrame): with pytest.raises(TypeError): - utils._apply_nary_op(scalar_types_df, ops.sub_op, "string_col", "string_col") + utils._apply_binary_op(scalar_types_df, ops.sub_op, "string_col", "string_col") with pytest.raises(TypeError): - utils._apply_nary_op(scalar_types_df, ops.sub_op, "int64_col", "string_col") + utils._apply_binary_op(scalar_types_df, ops.sub_op, "int64_col", "string_col") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py index 213028844c..99dbce9410 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py @@ -308,13 +308,13 @@ def test_zfill(scalar_types_df: bpd.DataFrame, snapshot): def test_add_string(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["string_col"]] - sql = utils._apply_nary_op(bf_df, ops.add_op, "string_col", ex.const("a")) + sql = utils._apply_binary_op(bf_df, ops.add_op, "string_col", ex.const("a")) snapshot.assert_match(sql, "out.sql") def test_strconcat(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["string_col"]] - sql = utils._apply_nary_op(bf_df, ops.strconcat_op, "string_col", ex.const("a")) + sql = utils._apply_binary_op(bf_df, ops.strconcat_op, "string_col", ex.const("a")) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py index a60c62aa80..7e67e44cd3 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py @@ -12,15 +12,39 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import pytest from bigframes import operations as ops +from bigframes.core import expression as ex import bigframes.pandas as bpd from bigframes.testing import utils pytest.importorskip("pytest_snapshot") +def _apply_nary_op( + obj: bpd.DataFrame, + op: ops.NaryOp, + *args: typing.Union[str, ex.Expression], +) -> str: + """Applies a nary op to the given DataFrame and return the SQL representing + the resulting DataFrame.""" + array_value = obj._block.expr + op_expr = op.as_expr(*args) + result, col_ids = array_value.compute_values([op_expr]) + + # Rename columns for deterministic golden SQL results. + assert len(col_ids) == 1 + result = result.rename_columns({col_ids[0]: "result_col"}).select_columns( + ["result_col"] + ) + + sql = result.session._executor.to_sql(result, enable_cache=False) + return sql + + def test_struct_field(nested_structs_types_df: bpd.DataFrame, snapshot): col_name = "people" bf_df = nested_structs_types_df[[col_name]] @@ -39,6 +63,6 @@ def test_struct_field(nested_structs_types_df: bpd.DataFrame, snapshot): def test_struct_op(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["bool_col", "int64_col", "float64_col", "string_col"]] op = ops.StructOp(column_names=tuple(bf_df.columns.tolist())) - sql = utils._apply_nary_op(bf_df, op, *bf_df.columns.tolist()) + sql = _apply_nary_op(bf_df, op, *bf_df.columns.tolist()) snapshot.assert_match(sql, "out.sql") From 88c763654697cec06ef02a90982bfcb7e280abe7 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 27 Oct 2025 22:40:09 +0000 Subject: [PATCH 5/5] address comments --- .../compile/sqlglot/expressions/generic_ops.py | 1 - bigframes/testing/utils.py | 14 ++++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/bigframes/core/compile/sqlglot/expressions/generic_ops.py index 23bf355a69..7572a1e801 100644 --- a/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -83,7 +83,6 @@ def _(expr: TypedExpr) -> sge.Expression: @register_nary_op(ops.SqlScalarOp, pass_op=True) def _(*operands: TypedExpr, op: ops.SqlScalarOp) -> sge.Expression: - # TODO: can we include a string in the sqlglot expression without parsing? return sg.parse_one( op.sql_template.format( *[operand.expr.sql(dialect="bigquery") for operand in operands] diff --git a/bigframes/testing/utils.py b/bigframes/testing/utils.py index b4daab7aad..a0bfc9e648 100644 --- a/bigframes/testing/utils.py +++ b/bigframes/testing/utils.py @@ -475,13 +475,23 @@ def _apply_binary_op( ) -> str: """Applies a binary op to the given DataFrame and return the SQL representing the resulting DataFrame.""" + return _apply_nary_op(obj, op, l_arg, r_arg) + + +def _apply_nary_op( + obj: bpd.DataFrame, + op: Union[ops.BinaryOp, ops.NaryOp], + *args: Union[str, ex.Expression], +) -> str: + """Applies a nary op to the given DataFrame and return the SQL representing + the resulting DataFrame.""" array_value = obj._block.expr - op_expr = op.as_expr(l_arg, r_arg) + op_expr = op.as_expr(*args) result, col_ids = array_value.compute_values([op_expr]) # Rename columns for deterministic golden SQL results. assert len(col_ids) == 1 - result = result.rename_columns({col_ids[0]: l_arg}).select_columns([l_arg]) + result = result.rename_columns({col_ids[0]: args[0]}).select_columns([args[0]]) sql = result.session._executor.to_sql(result, enable_cache=False) return sql