From c03b84e2633861c57966916a4aba68af1b23979e Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 17 Jul 2025 22:08:06 +0000 Subject: [PATCH 1/3] chore: Migrate up to 5 scalar operators to SQLGlot Migrated cos, hash, isnull, notnull, and sin operators. --- .../sqlglot/expressions/unary_compiler.py | 25 ++++++++++++++++ .../test_unary_compiler/test_cos/out.sql | 13 ++++++++ .../test_unary_compiler/test_hash/out.sql | 13 ++++++++ .../test_unary_compiler/test_isnull/out.sql | 13 ++++++++ .../test_unary_compiler/test_notnull/out.sql | 13 ++++++++ .../test_unary_compiler/test_sin/out.sql | 13 ++++++++ .../expressions/test_unary_compiler.py | 30 +++++++++++++++++++ 7 files changed, 120 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_hash/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isnull/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_notnull/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py index 9cca15f352..ebca41db27 100644 --- a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py @@ -72,6 +72,31 @@ def _(op: ops.ArraySliceOp, expr: TypedExpr) -> sge.Expression: return sge.array(selected_elements) +@UNARY_OP_REGISTRATION.register(ops.cos_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("cos", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.hash_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("FARM_FINGERPRINT", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.isnull_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Is(this=expr.expr, expression=sge.Null()) + + +@UNARY_OP_REGISTRATION.register(ops.notnull_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null())) + + +@UNARY_OP_REGISTRATION.register(ops.sin_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("sin", expr.expr) + + # JSON Ops @UNARY_OP_REGISTRATION.register(ops.JSONExtract) def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression: diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql new file mode 100644 index 0000000000..c42464bf75 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + cos(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_hash/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_hash/out.sql new file mode 100644 index 0000000000..14d6df6d22 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_hash/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + FARM_FINGERPRINT(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isnull/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isnull/out.sql new file mode 100644 index 0000000000..55a2ebb970 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isnull/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_0` IS NULL AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_notnull/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_notnull/out.sql new file mode 100644 index 0000000000..c1961f9d62 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_notnull/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + NOT `bfcol_0` IS NULL AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql new file mode 100644 index 0000000000..1a07ba546f --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + sin(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py index d74e9c9d48..71989f1052 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py @@ -58,6 +58,36 @@ def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snaps snapshot.assert_match(sql, "out.sql") +def test_cos(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.cos_op, "float64_col") + snapshot.assert_match(sql, "out.sql") + + +def test_hash(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.hash_op, "string_col") + snapshot.assert_match(sql, "out.sql") + + +def test_isnull(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.isnull_op, "float64_col") + snapshot.assert_match(sql, "out.sql") + + +def test_notnull(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.notnull_op, "float64_col") + snapshot.assert_match(sql, "out.sql") + + +def test_sin(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.sin_op, "float64_col") + snapshot.assert_match(sql, "out.sql") + + def test_json_extract(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONExtract(json_path="$"), "json_col") From c83b8794dc34d8d078125aaaca9fe96dd25976fe Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 17 Jul 2025 22:40:38 +0000 Subject: [PATCH 2/3] chore: Migrate up to 5 scalar operators to SQLGlot Migrated tan_op, arcsin_op, arccos_op, arctan_op, and sinh_op scalar operators to SQLGlot. --- .../sqlglot/expressions/unary_compiler.py | 50 +++++++++++++++++++ .../test_unary_compiler/test_arccos/out.sql | 13 +++++ .../test_unary_compiler/test_arcsin/out.sql | 13 +++++ .../test_unary_compiler/test_arctan/out.sql | 13 +++++ .../test_unary_compiler/test_sinh/out.sql | 17 +++++++ .../test_unary_compiler/test_tan/out.sql | 13 +++++ .../expressions/test_unary_compiler.py | 30 +++++++++++ 7 files changed, 149 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arctan/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_tan/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py index ebca41db27..76f7adfb30 100644 --- a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py @@ -30,6 +30,37 @@ def compile(op: ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return UNARY_OP_REGISTRATION[op](op, expr) +@UNARY_OP_REGISTRATION.register(ops.arccos_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.func("ABS", expr.expr) > sge.convert(1), + true=sge.func("IEEE_DIVIDE", sge.convert(0), sge.convert(0)), + ) + ], + default=sge.func("ACOS", expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.arcsin_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.func("ABS", expr.expr) > sge.convert(1), + true=sge.func("IEEE_DIVIDE", sge.convert(0), sge.convert(0)), + ) + ], + default=sge.func("ASIN", expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.arctan_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("ATAN", expr.expr) + + @UNARY_OP_REGISTRATION.register(ops.ArrayToStringOp) def _(op: ops.ArrayToStringOp, expr: TypedExpr) -> sge.Expression: return sge.ArrayToString(this=expr.expr, expression=f"'{op.delimiter}'") @@ -97,6 +128,25 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.func("sin", expr.expr) +@UNARY_OP_REGISTRATION.register(ops.sinh_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.func("ABS", expr.expr) > sge.convert(709.78), + true=sge.func("SIGN", expr.expr) + * sge.func("IEEE_DIVIDE", sge.convert(1), sge.convert(0)), + ) + ], + default=sge.func("SINH", expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.tan_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("TAN", expr.expr) + + # JSON Ops @UNARY_OP_REGISTRATION.register(ops.JSONExtract) def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression: diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql new file mode 100644 index 0000000000..bbe062c4d7 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN ABS(`bfcol_0`) > 1 THEN IEEE_DIVIDE(0, 0) ELSE ACOS(`bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql new file mode 100644 index 0000000000..9f7048d845 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN ABS(`bfcol_0`) > 1 THEN IEEE_DIVIDE(0, 0) ELSE ASIN(`bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arctan/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arctan/out.sql new file mode 100644 index 0000000000..ec6a22e653 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arctan/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + ATAN(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql new file mode 100644 index 0000000000..dfe6bcc936 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql @@ -0,0 +1,17 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE + WHEN ABS(`bfcol_0`) > 709.78 + THEN SIGN(`bfcol_0`) * IEEE_DIVIDE(1, 0) + ELSE SINH(`bfcol_0`) + END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_tan/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_tan/out.sql new file mode 100644 index 0000000000..5fac274b6b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_tan/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + TAN(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py index 71989f1052..827126fba8 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py @@ -34,6 +34,24 @@ def _apply_unary_op(obj: bpd.DataFrame, op: ops.UnaryOp, arg: str) -> str: return sql +def test_arccos(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.arccos_op, "float64_col") + snapshot.assert_match(sql, "out.sql") + + +def test_arcsin(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.arcsin_op, "float64_col") + snapshot.assert_match(sql, "out.sql") + + +def test_arctan(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.arctan_op, "float64_col") + snapshot.assert_match(sql, "out.sql") + + def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): bf_df = repeated_types_df[["string_list_col"]] sql = _apply_unary_op(bf_df, ops.ArrayToStringOp(delimiter="."), "string_list_col") @@ -88,6 +106,18 @@ def test_sin(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_sinh(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.sinh_op, "float64_col") + snapshot.assert_match(sql, "out.sql") + + +def test_tan(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.tan_op, "float64_col") + snapshot.assert_match(sql, "out.sql") + + def test_json_extract(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONExtract(json_path="$"), "json_col") From 5c0dcbfde70715d81b5c3333846d219d62a3e9e1 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 18 Jul 2025 20:43:33 +0000 Subject: [PATCH 3/3] address comments --- .../sqlglot/expressions/unary_compiler.py | 21 ++++++++++++------ .../test_unary_compiler/test_arccos/out.sql | 2 +- .../test_unary_compiler/test_arcsin/out.sql | 2 +- .../test_unary_compiler/test_cos/out.sql | 2 +- .../test_unary_compiler/test_sin/out.sql | 2 +- .../test_unary_compiler/test_sinh/out.sql | 2 +- .../expressions/test_binary_compiler.py | 4 ++++ .../expressions/test_unary_compiler.py | 22 +++++++++++++++++++ 8 files changed, 45 insertions(+), 12 deletions(-) diff --git a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py index 76f7adfb30..1fc074e2ef 100644 --- a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py @@ -23,6 +23,14 @@ from bigframes.core.compile.sqlglot.expressions.op_registration import OpRegistration from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +_NAN = sge.Cast(this=sge.convert("NaN"), to="FLOAT64") +_INF = sge.Cast(this=sge.convert("Infinity"), to="FLOAT64") + +# Approx Highest number you can pass in to EXP function and get a valid FLOAT64 result +# FLOAT64 has 11 exponent bits, so max values is about 2**(2**10) +# ln(2**(2**10)) == (2**10)*ln(2) ~= 709.78, so EXP(x) for x>709.78 will overflow. +_FLOAT64_EXP_BOUND = sge.convert(709.78) + UNARY_OP_REGISTRATION = OpRegistration() @@ -36,7 +44,7 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: ifs=[ sge.If( this=sge.func("ABS", expr.expr) > sge.convert(1), - true=sge.func("IEEE_DIVIDE", sge.convert(0), sge.convert(0)), + true=_NAN, ) ], default=sge.func("ACOS", expr.expr), @@ -49,7 +57,7 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: ifs=[ sge.If( this=sge.func("ABS", expr.expr) > sge.convert(1), - true=sge.func("IEEE_DIVIDE", sge.convert(0), sge.convert(0)), + true=_NAN, ) ], default=sge.func("ASIN", expr.expr), @@ -105,7 +113,7 @@ def _(op: ops.ArraySliceOp, expr: TypedExpr) -> sge.Expression: @UNARY_OP_REGISTRATION.register(ops.cos_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: - return sge.func("cos", expr.expr) + return sge.func("COS", expr.expr) @UNARY_OP_REGISTRATION.register(ops.hash_op) @@ -125,7 +133,7 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: @UNARY_OP_REGISTRATION.register(ops.sin_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: - return sge.func("sin", expr.expr) + return sge.func("SIN", expr.expr) @UNARY_OP_REGISTRATION.register(ops.sinh_op) @@ -133,9 +141,8 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.Case( ifs=[ sge.If( - this=sge.func("ABS", expr.expr) > sge.convert(709.78), - true=sge.func("SIGN", expr.expr) - * sge.func("IEEE_DIVIDE", sge.convert(1), sge.convert(0)), + this=sge.func("ABS", expr.expr) > _FLOAT64_EXP_BOUND, + true=sge.func("SIGN", expr.expr) * _INF, ) ], default=sge.func("SINH", expr.expr), diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql index bbe062c4d7..df695b7fbc 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE WHEN ABS(`bfcol_0`) > 1 THEN IEEE_DIVIDE(0, 0) ELSE ACOS(`bfcol_0`) END AS `bfcol_1` + CASE WHEN ABS(`bfcol_0`) > 1 THEN CAST('NaN' AS FLOAT64) ELSE ACOS(`bfcol_0`) END AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql index 9f7048d845..3afc7c64b8 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE WHEN ABS(`bfcol_0`) > 1 THEN IEEE_DIVIDE(0, 0) ELSE ASIN(`bfcol_0`) END AS `bfcol_1` + CASE WHEN ABS(`bfcol_0`) > 1 THEN CAST('NaN' AS FLOAT64) ELSE ASIN(`bfcol_0`) END AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql index c42464bf75..126d2a63f2 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - cos(`bfcol_0`) AS `bfcol_1` + COS(`bfcol_0`) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql index 1a07ba546f..62a5cff0b5 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - sin(`bfcol_0`) AS `bfcol_1` + SIN(`bfcol_0`) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql index dfe6bcc936..711dba94a9 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql @@ -7,7 +7,7 @@ WITH `bfcte_0` AS ( *, CASE WHEN ABS(`bfcol_0`) > 709.78 - THEN SIGN(`bfcol_0`) * IEEE_DIVIDE(1, 0) + THEN SIGN(`bfcol_0`) * CAST('Infinity' AS FLOAT64) ELSE SINH(`bfcol_0`) END AS `bfcol_1` FROM `bfcte_0` diff --git a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py index c046d389f6..a78a41fdbf 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py @@ -44,18 +44,21 @@ def _apply_binary_op( def test_add_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col"]] sql = _apply_binary_op(bf_df, ops.add_op, "int64_col", "int64_col") + snapshot.assert_match(sql, "out.sql") def test_add_numeric_w_scalar(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col"]] sql = _apply_binary_op(bf_df, ops.add_op, "int64_col", ex.const(1)) + snapshot.assert_match(sql, "out.sql") def test_add_string(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["string_col"]] sql = _apply_binary_op(bf_df, ops.add_op, "string_col", ex.const("a")) + snapshot.assert_match(sql, "out.sql") @@ -64,4 +67,5 @@ def test_json_set(json_types_df: bpd.DataFrame, snapshot): sql = _apply_binary_op( bf_df, ops.JSONSet(json_path="$.a"), "json_col", ex.const(100) ) + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py index 827126fba8..6449f179ab 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py @@ -37,130 +37,152 @@ def _apply_unary_op(obj: bpd.DataFrame, op: ops.UnaryOp, arg: str) -> str: def test_arccos(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["float64_col"]] sql = _apply_unary_op(bf_df, ops.arccos_op, "float64_col") + snapshot.assert_match(sql, "out.sql") def test_arcsin(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["float64_col"]] sql = _apply_unary_op(bf_df, ops.arcsin_op, "float64_col") + snapshot.assert_match(sql, "out.sql") def test_arctan(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["float64_col"]] sql = _apply_unary_op(bf_df, ops.arctan_op, "float64_col") + snapshot.assert_match(sql, "out.sql") def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): bf_df = repeated_types_df[["string_list_col"]] sql = _apply_unary_op(bf_df, ops.ArrayToStringOp(delimiter="."), "string_list_col") + snapshot.assert_match(sql, "out.sql") def test_array_index(repeated_types_df: bpd.DataFrame, snapshot): bf_df = repeated_types_df[["string_list_col"]] sql = _apply_unary_op(bf_df, convert_index(1), "string_list_col") + snapshot.assert_match(sql, "out.sql") def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot): bf_df = repeated_types_df[["string_list_col"]] sql = _apply_unary_op(bf_df, convert_slice(slice(1, None)), "string_list_col") + snapshot.assert_match(sql, "out.sql") def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snapshot): bf_df = repeated_types_df[["string_list_col"]] sql = _apply_unary_op(bf_df, convert_slice(slice(1, 5)), "string_list_col") + snapshot.assert_match(sql, "out.sql") def test_cos(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["float64_col"]] sql = _apply_unary_op(bf_df, ops.cos_op, "float64_col") + snapshot.assert_match(sql, "out.sql") def test_hash(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["string_col"]] sql = _apply_unary_op(bf_df, ops.hash_op, "string_col") + snapshot.assert_match(sql, "out.sql") def test_isnull(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["float64_col"]] sql = _apply_unary_op(bf_df, ops.isnull_op, "float64_col") + snapshot.assert_match(sql, "out.sql") def test_notnull(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["float64_col"]] sql = _apply_unary_op(bf_df, ops.notnull_op, "float64_col") + snapshot.assert_match(sql, "out.sql") def test_sin(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["float64_col"]] sql = _apply_unary_op(bf_df, ops.sin_op, "float64_col") + snapshot.assert_match(sql, "out.sql") def test_sinh(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["float64_col"]] sql = _apply_unary_op(bf_df, ops.sinh_op, "float64_col") + snapshot.assert_match(sql, "out.sql") def test_tan(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["float64_col"]] sql = _apply_unary_op(bf_df, ops.tan_op, "float64_col") + snapshot.assert_match(sql, "out.sql") def test_json_extract(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONExtract(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_json_extract_array(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONExtractArray(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONExtractStringArray(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_json_query(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONQuery(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_json_query_array(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONQueryArray(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_json_value(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONValue(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["string_col"]] sql = _apply_unary_op(bf_df, ops.ParseJSON(), "string_col") + snapshot.assert_match(sql, "out.sql") def test_to_json_string(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.ToJSONString(), "json_col") + snapshot.assert_match(sql, "out.sql")