From 5fbc3fa2b5c20dac01a91dd344df1661ee0ad5ad Mon Sep 17 00:00:00 2001 From: jialuo Date: Tue, 11 Nov 2025 18:21:43 +0000 Subject: [PATCH 1/2] chore: Migrate manhattan_distance_op operator to SQLGlot --- .../compile/sqlglot/expressions/numeric_ops.py | 12 ++++++++++++ .../test_manhattan_distance/out.sql | 16 ++++++++++++++++ .../sqlglot/expressions/test_numeric_ops.py | 15 +++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_manhattan_distance/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py index 36e2973565..0941c4de2e 100644 --- a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py @@ -338,6 +338,18 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: return result +@register_binary_op(ops.manhattan_distance_op) +def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + return sge.Anonymous( + this="ML.DISTANCE", + expressions=[ + left.expr, + right.expr, + sge.Literal.string("MANHATTAN"), + ], + ) + + @register_binary_op(ops.mod_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: # In BigQuery returned value has the same sign as X. In pandas, the sign of y is used, so we need to flip the result if sign(x) != sign(y) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_manhattan_distance/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_manhattan_distance/out.sql new file mode 100644 index 0000000000..185bb7b277 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_manhattan_distance/out.sql @@ -0,0 +1,16 @@ +WITH `bfcte_0` AS ( + SELECT + `float_list_col`, + `numeric_list_col` + FROM `bigframes-dev`.`sqlglot_test`.`repeated_types` +), `bfcte_1` AS ( + SELECT + *, + ML.DISTANCE(`float_list_col`, `float_list_col`, 'MANHATTAN') AS `bfcol_2`, + ML.DISTANCE(`numeric_list_col`, `numeric_list_col`, 'MANHATTAN') AS `bfcol_3` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `float_list_col`, + `bfcol_3` AS `numeric_list_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py index 06731bcbfa..749a3fdd9a 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py @@ -339,6 +339,21 @@ def test_floordiv_timedelta(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(bf_df.sql, "out.sql") +def test_manhattan_distance(repeated_types_df: bpd.DataFrame, snapshot): + col_names = ["float_list_col", "numeric_list_col"] + bf_df = repeated_types_df[col_names] + + sql = utils._apply_ops_to_sql( + bf_df, + [ + ops.manhattan_distance_op.as_expr("float_list_col", "float_list_col"), + ops.manhattan_distance_op.as_expr("numeric_list_col", "numeric_list_col"), + ], + ["float_list_col", "numeric_list_col"], + ) + snapshot.assert_match(sql, "out.sql") + + def test_mul_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "bool_col"]] From 8fa638f97c2974a12fb7d4330c52781d68095e44 Mon Sep 17 00:00:00 2001 From: jialuo Date: Thu, 13 Nov 2025 01:34:23 +0000 Subject: [PATCH 2/2] use sge.func --- .../core/compile/sqlglot/expressions/numeric_ops.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py index 0941c4de2e..f30a9e0146 100644 --- a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py @@ -340,13 +340,8 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.manhattan_distance_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: - return sge.Anonymous( - this="ML.DISTANCE", - expressions=[ - left.expr, - right.expr, - sge.Literal.string("MANHATTAN"), - ], + return sge.func( + "ML.DISTANCE", left.expr, right.expr, sge.Literal.string("MANHATTAN") )