From 23b769d6b43d631409ffe60589e0ba84e078a2fb Mon Sep 17 00:00:00 2001 From: jialuo Date: Fri, 7 Nov 2025 01:06:37 +0000 Subject: [PATCH 1/2] chore: Migrate euclidean_distance_op operator to SQLGlot --- .../compile/sqlglot/expressions/numeric_ops.py | 12 ++++++++++++ .../test_euclidean_distance/out.sql | 16 ++++++++++++++++ .../sqlglot/expressions/test_numeric_ops.py | 15 +++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_euclidean_distance/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py index afc0d9d01c..c37e977b4b 100644 --- a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py @@ -286,6 +286,18 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: return result +@register_binary_op(ops.euclidean_distance_op) +def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + return sge.Anonymous( + this="ML.DISTANCE", + expressions=[ + left.expr, + right.expr, + sge.Literal.string("EUCLIDEAN"), + ], + ) + + @register_binary_op(ops.floordiv_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: left_expr = _coerce_bool_to_int(left) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_euclidean_distance/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_euclidean_distance/out.sql new file mode 100644 index 0000000000..f1e9970c71 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_euclidean_distance/out.sql @@ -0,0 +1,16 @@ +WITH `bfcte_0` AS ( + SELECT + `int_list_col` AS `bfcol_0`, + `numeric_list_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`repeated_types` +), `bfcte_1` AS ( + SELECT + *, + ML.DISTANCE(`bfcol_0`, `bfcol_0`, 'EUCLIDEAN') AS `bfcol_2`, + ML.DISTANCE(`bfcol_1`, `bfcol_1`, 'EUCLIDEAN') AS `bfcol_3` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `int_list_col`, + `bfcol_3` AS `numeric_list_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py index c66fe15c16..fe53ad461f 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py @@ -286,6 +286,21 @@ def test_div_timedelta(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(bf_df.sql, "out.sql") +def test_euclidean_distance(repeated_types_df: bpd.DataFrame, snapshot): + col_names = ["int_list_col", "numeric_list_col"] + bf_df = repeated_types_df[col_names] + + sql = utils._apply_ops_to_sql( + bf_df, + [ + ops.euclidean_distance_op.as_expr("int_list_col", "int_list_col"), + ops.euclidean_distance_op.as_expr("numeric_list_col", "numeric_list_col"), + ], + ["int_list_col", "numeric_list_col"], + ) + snapshot.assert_match(sql, "out.sql") + + def test_floordiv_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "bool_col", "float64_col"]] From 43e44a78269f393ec3756bb67af3d175aa48ce49 Mon Sep 17 00:00:00 2001 From: jialuo Date: Tue, 11 Nov 2025 01:00:16 +0000 Subject: [PATCH 2/2] update sql --- .../test_numeric_ops/test_euclidean_distance/out.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_euclidean_distance/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_euclidean_distance/out.sql index f1e9970c71..3327a99f4b 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_euclidean_distance/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_euclidean_distance/out.sql @@ -1,13 +1,13 @@ WITH `bfcte_0` AS ( SELECT - `int_list_col` AS `bfcol_0`, - `numeric_list_col` AS `bfcol_1` + `int_list_col`, + `numeric_list_col` FROM `bigframes-dev`.`sqlglot_test`.`repeated_types` ), `bfcte_1` AS ( SELECT *, - ML.DISTANCE(`bfcol_0`, `bfcol_0`, 'EUCLIDEAN') AS `bfcol_2`, - ML.DISTANCE(`bfcol_1`, `bfcol_1`, 'EUCLIDEAN') AS `bfcol_3` + ML.DISTANCE(`int_list_col`, `int_list_col`, 'EUCLIDEAN') AS `bfcol_2`, + ML.DISTANCE(`numeric_list_col`, `numeric_list_col`, 'EUCLIDEAN') AS `bfcol_3` FROM `bfcte_0` ) SELECT