diff --git a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py index e33702b08c..e0ea24a470 100644 --- a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py @@ -350,6 +350,13 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: return result +@register_binary_op(ops.manhattan_distance_op) +def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + return sge.func( + "ML.DISTANCE", left.expr, right.expr, sge.Literal.string("MANHATTAN") + ) + + @register_binary_op(ops.mod_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: # In BigQuery returned value has the same sign as X. In pandas, the sign of y is used, so we need to flip the result if sign(x) != sign(y) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_manhattan_distance/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_manhattan_distance/out.sql new file mode 100644 index 0000000000..185bb7b277 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_manhattan_distance/out.sql @@ -0,0 +1,16 @@ +WITH `bfcte_0` AS ( + SELECT + `float_list_col`, + `numeric_list_col` + FROM `bigframes-dev`.`sqlglot_test`.`repeated_types` +), `bfcte_1` AS ( + SELECT + *, + ML.DISTANCE(`float_list_col`, `float_list_col`, 'MANHATTAN') AS `bfcol_2`, + ML.DISTANCE(`numeric_list_col`, `numeric_list_col`, 'MANHATTAN') AS `bfcol_3` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `float_list_col`, + `bfcol_3` AS `numeric_list_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py index c58ce9e2f1..5d3b23ebb7 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py @@ -354,6 +354,21 @@ def test_floordiv_timedelta(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(bf_df.sql, "out.sql") +def test_manhattan_distance(repeated_types_df: bpd.DataFrame, snapshot): + col_names = ["float_list_col", "numeric_list_col"] + bf_df = repeated_types_df[col_names] + + sql = utils._apply_ops_to_sql( + bf_df, + [ + ops.manhattan_distance_op.as_expr("float_list_col", "float_list_col"), + ops.manhattan_distance_op.as_expr("numeric_list_col", "numeric_list_col"), + ], + ["float_list_col", "numeric_list_col"], + ) + snapshot.assert_match(sql, "out.sql") + + def test_mul_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "bool_col"]]