From a61bb093e60ababfe43d14d9dd14e2c1ba319b21 Mon Sep 17 00:00:00 2001 From: jialuo Date: Thu, 30 Oct 2025 18:42:14 +0000 Subject: [PATCH 1/4] chore: Migrate coalesce_op operator to SQLGlot Migrated the coalesce_op operator from Ibis to SQLGlot. --- .../compile/sqlglot/expressions/generic_ops.py | 8 ++++++++ .../test_generic_ops/test_coalesce/out.sql | 14 ++++++++++++++ .../sqlglot/expressions/test_generic_ops.py | 7 +++++++ 3 files changed, 29 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_coalesce/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/bigframes/core/compile/sqlglot/expressions/generic_ops.py index 07505855e1..2bd19e1967 100644 --- a/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -24,6 +24,7 @@ import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op +register_binary_op = scalar_compiler.scalar_op_compiler.register_binary_op register_nary_op = scalar_compiler.scalar_op_compiler.register_nary_op register_ternary_op = scalar_compiler.scalar_op_compiler.register_ternary_op @@ -159,6 +160,13 @@ def _(*cases_and_outputs: TypedExpr) -> sge.Expression: ) +@register_binary_op(ops.coalesce_op) +def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == right.expr: + return left.expr + return sge.Coalesce(this=left.expr, expressions=[right.expr]) + + @register_nary_op(ops.RowKey) def _(*values: TypedExpr) -> sge.Expression: # All inputs into hash must be non-null or resulting hash will be null diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_coalesce/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_coalesce/out.sql new file mode 100644 index 0000000000..ef52810d40 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_coalesce/out.sql @@ -0,0 +1,14 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0`, + `int64_too` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + COALESCE(`bfcol_0`, `bfcol_1`) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `int64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py index fd9732bf89..dde9f19e1e 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -209,6 +209,13 @@ def test_case_when_op(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_coalesce(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "int64_too"]] + sql = utils._apply_binary_op(bf_df, ops.coalesce_op, "int64_col", "int64_too") + + snapshot.assert_match(sql, "out.sql") + + def test_clip(scalar_types_df: bpd.DataFrame, snapshot): op_expr = ops.clip_op.as_expr("rowindex", "int64_col", "int64_too") From 210f125611599a0d01312a89fcef942f824e3f98 Mon Sep 17 00:00:00 2001 From: jialuo Date: Mon, 3 Nov 2025 21:32:45 +0000 Subject: [PATCH 2/4] fix test --- .../snapshots/test_generic_ops/test_coalesce/out.sql | 6 ++++-- .../core/compile/sqlglot/expressions/test_generic_ops.py | 9 ++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_coalesce/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_coalesce/out.sql index ef52810d40..5b11a1ddeb 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_coalesce/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_coalesce/out.sql @@ -6,9 +6,11 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - COALESCE(`bfcol_0`, `bfcol_1`) AS `bfcol_2` + `bfcol_0` AS `bfcol_2`, + COALESCE(`bfcol_1`, `bfcol_0`) AS `bfcol_3` FROM `bfcte_0` ) SELECT - `bfcol_2` AS `int64_col` + `bfcol_2` AS `int64_col`, + `bfcol_3` AS `int64_too` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py index 1156dd45a3..68027de8f7 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -211,8 +211,15 @@ def test_case_when_op(scalar_types_df: bpd.DataFrame, snapshot): def test_coalesce(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "int64_too"]] - sql = utils._apply_binary_op(bf_df, ops.coalesce_op, "int64_col", "int64_too") + sql = utils._apply_ops_to_sql( + bf_df, + [ + ops.coalesce_op.as_expr("int64_col", "int64_col"), + ops.coalesce_op.as_expr("int64_too", "int64_col"), + ], + ["int64_col", "int64_too"], + ) snapshot.assert_match(sql, "out.sql") From 9a06288eea826cce027d63400ba99f4b5bc0e781 Mon Sep 17 00:00:00 2001 From: jialuo Date: Mon, 3 Nov 2025 21:43:06 +0000 Subject: [PATCH 3/4] fix lint --- tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py index 68027de8f7..693f8dc34c 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -215,7 +215,7 @@ def test_coalesce(scalar_types_df: bpd.DataFrame, snapshot): sql = utils._apply_ops_to_sql( bf_df, [ - ops.coalesce_op.as_expr("int64_col", "int64_col"), + ops.coalesce_op.as_expr("int64_col", "int64_col"), ops.coalesce_op.as_expr("int64_too", "int64_col"), ], ["int64_col", "int64_too"], From ab3fab1c2d0adb7751720c631e97587d45afca4a Mon Sep 17 00:00:00 2001 From: jialuo Date: Tue, 4 Nov 2025 18:56:40 +0000 Subject: [PATCH 4/4] enable engine test --- tests/system/small/engines/test_generic_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/small/engines/test_generic_ops.py b/tests/system/small/engines/test_generic_ops.py index 5641f91a9a..01d4dad849 100644 --- a/tests/system/small/engines/test_generic_ops.py +++ b/tests/system/small/engines/test_generic_ops.py @@ -329,7 +329,7 @@ def test_engines_where_op(scalars_array_value: array_value.ArrayValue, engine): assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine) -@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True) +@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True) def test_engines_coalesce_op(scalars_array_value: array_value.ArrayValue, engine): arr, _ = scalars_array_value.compute_values( [