From bacab0e5ec8f7ea358a34acd96c40e4f557eb4da Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 30 Sep 2025 20:20:23 +0000 Subject: [PATCH 1/2] refactor: support agg_ops.AnyValueOp in sqlglot compiler --- .../compile/sqlglot/aggregations/unary_compiler.py | 9 +++++++++ .../test_unary_compiler/test_any_value/out.sql | 12 ++++++++++++ .../sqlglot/aggregations/test_unary_compiler.py | 9 +++++++++ 3 files changed, 30 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/out.sql diff --git a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py index e8baa15bce..21c83b3be6 100644 --- a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py @@ -69,6 +69,15 @@ def _( return sge.func("APPROX_TOP_COUNT", column.expr, sge.convert(op.number)) +@UNARY_OP_REGISTRATION.register(agg_ops.AnyValueOp) +def _( + op: agg_ops.AnyValueOp, + column: typed_expr.TypedExpr, + window: typing.Optional[window_spec.WindowSpec] = None, +) -> sge.Expression: + return apply_window_if_present(sge.func("ANY_VALUE", column.expr), window) + + @UNARY_OP_REGISTRATION.register(agg_ops.CountOp) def _( op: agg_ops.CountOp, diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/out.sql new file mode 100644 index 0000000000..f95b094a13 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/out.sql @@ -0,0 +1,12 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + ANY_VALUE(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `int64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py index 4abf80df19..44527a4167 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py @@ -87,6 +87,15 @@ def test_approx_top_count(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_any_value(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "int64_col" + bf_df = scalar_types_df[[col_name]] + agg_expr = agg_ops.AnyValueOp().as_expr(col_name) + sql = _apply_unary_agg_ops(bf_df, [agg_expr], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + def test_count(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]] From 0a22f85a3ed0bcaad5250eaeff102b3abe6f4365 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 30 Sep 2025 20:24:41 +0000 Subject: [PATCH 2/2] refactor: support agg_ops.All in sqlglot compiler --- .../compile/sqlglot/aggregations/unary_compiler.py | 11 +++++++++++ .../snapshots/test_unary_compiler/test_all/out.sql | 12 ++++++++++++ .../sqlglot/aggregations/test_unary_compiler.py | 9 +++++++++ 3 files changed, 32 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql diff --git a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py index 21c83b3be6..1e87fd1fc5 100644 --- a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py @@ -38,6 +38,17 @@ def compile( return UNARY_OP_REGISTRATION[op](op, column, window=window) +@UNARY_OP_REGISTRATION.register(agg_ops.AllOp) +def _( + op: agg_ops.AllOp, + column: typed_expr.TypedExpr, + window: typing.Optional[window_spec.WindowSpec] = None, +) -> sge.Expression: + # BQ will return null for empty column, result would be false in pandas. + result = apply_window_if_present(sge.func("LOGICAL_AND", column.expr), window) + return sge.func("IFNULL", result, sge.true()) + + @UNARY_OP_REGISTRATION.register(agg_ops.ApproxQuartilesOp) def _( op: agg_ops.ApproxQuartilesOp, diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql new file mode 100644 index 0000000000..7303d758cc --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql @@ -0,0 +1,12 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + COALESCE(LOGICAL_AND(`bfcol_0`), TRUE) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `bool_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py index 44527a4167..ea7faca7fb 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py @@ -63,6 +63,15 @@ def _apply_unary_window_op( return sql +def test_all(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "bool_col" + bf_df = scalar_types_df[[col_name]] + agg_expr = agg_ops.AllOp().as_expr(col_name) + sql = _apply_unary_agg_ops(bf_df, [agg_expr], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + def test_approx_quartiles(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]]