diff --git a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py index 9cca15f352..1fc074e2ef 100644 --- a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py @@ -23,6 +23,14 @@ from bigframes.core.compile.sqlglot.expressions.op_registration import OpRegistration from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +_NAN = sge.Cast(this=sge.convert("NaN"), to="FLOAT64") +_INF = sge.Cast(this=sge.convert("Infinity"), to="FLOAT64") + +# Approx Highest number you can pass in to EXP function and get a valid FLOAT64 result +# FLOAT64 has 11 exponent bits, so max values is about 2**(2**10) +# ln(2**(2**10)) == (2**10)*ln(2) ~= 709.78, so EXP(x) for x>709.78 will overflow. +_FLOAT64_EXP_BOUND = sge.convert(709.78) + UNARY_OP_REGISTRATION = OpRegistration() @@ -30,6 +38,37 @@ def compile(op: ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return UNARY_OP_REGISTRATION[op](op, expr) +@UNARY_OP_REGISTRATION.register(ops.arccos_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.func("ABS", expr.expr) > sge.convert(1), + true=_NAN, + ) + ], + default=sge.func("ACOS", expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.arcsin_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.func("ABS", expr.expr) > sge.convert(1), + true=_NAN, + ) + ], + default=sge.func("ASIN", expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.arctan_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("ATAN", expr.expr) + + @UNARY_OP_REGISTRATION.register(ops.ArrayToStringOp) def _(op: ops.ArrayToStringOp, expr: TypedExpr) -> sge.Expression: return sge.ArrayToString(this=expr.expr, expression=f"'{op.delimiter}'") @@ -72,6 +111,49 @@ def _(op: ops.ArraySliceOp, expr: TypedExpr) -> sge.Expression: return sge.array(selected_elements) +@UNARY_OP_REGISTRATION.register(ops.cos_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("COS", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.hash_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("FARM_FINGERPRINT", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.isnull_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Is(this=expr.expr, expression=sge.Null()) + + +@UNARY_OP_REGISTRATION.register(ops.notnull_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null())) + + +@UNARY_OP_REGISTRATION.register(ops.sin_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("SIN", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.sinh_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.func("ABS", expr.expr) > _FLOAT64_EXP_BOUND, + true=sge.func("SIGN", expr.expr) * _INF, + ) + ], + default=sge.func("SINH", expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.tan_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("TAN", expr.expr) + + # JSON Ops @UNARY_OP_REGISTRATION.register(ops.JSONExtract) def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression: diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql new file mode 100644 index 0000000000..df695b7fbc --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN ABS(`bfcol_0`) > 1 THEN CAST('NaN' AS FLOAT64) ELSE ACOS(`bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql new file mode 100644 index 0000000000..3afc7c64b8 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN ABS(`bfcol_0`) > 1 THEN CAST('NaN' AS FLOAT64) ELSE ASIN(`bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arctan/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arctan/out.sql new file mode 100644 index 0000000000..ec6a22e653 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arctan/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + ATAN(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql new file mode 100644 index 0000000000..126d2a63f2 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + COS(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_hash/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_hash/out.sql new file mode 100644 index 0000000000..14d6df6d22 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_hash/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + FARM_FINGERPRINT(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isnull/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isnull/out.sql new file mode 100644 index 0000000000..55a2ebb970 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isnull/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_0` IS NULL AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_notnull/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_notnull/out.sql new file mode 100644 index 0000000000..c1961f9d62 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_notnull/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + NOT `bfcol_0` IS NULL AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql new file mode 100644 index 0000000000..62a5cff0b5 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + SIN(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql new file mode 100644 index 0000000000..711dba94a9 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql @@ -0,0 +1,17 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE + WHEN ABS(`bfcol_0`) > 709.78 + THEN SIGN(`bfcol_0`) * CAST('Infinity' AS FLOAT64) + ELSE SINH(`bfcol_0`) + END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_tan/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_tan/out.sql new file mode 100644 index 0000000000..5fac274b6b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_tan/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + TAN(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py index c046d389f6..a78a41fdbf 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py @@ -44,18 +44,21 @@ def _apply_binary_op( def test_add_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col"]] sql = _apply_binary_op(bf_df, ops.add_op, "int64_col", "int64_col") + snapshot.assert_match(sql, "out.sql") def test_add_numeric_w_scalar(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col"]] sql = _apply_binary_op(bf_df, ops.add_op, "int64_col", ex.const(1)) + snapshot.assert_match(sql, "out.sql") def test_add_string(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["string_col"]] sql = _apply_binary_op(bf_df, ops.add_op, "string_col", ex.const("a")) + snapshot.assert_match(sql, "out.sql") @@ -64,4 +67,5 @@ def test_json_set(json_types_df: bpd.DataFrame, snapshot): sql = _apply_binary_op( bf_df, ops.JSONSet(json_path="$.a"), "json_col", ex.const(100) ) + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py index d74e9c9d48..6449f179ab 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py @@ -34,73 +34,155 @@ def _apply_unary_op(obj: bpd.DataFrame, op: ops.UnaryOp, arg: str) -> str: return sql +def test_arccos(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.arccos_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_arcsin(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.arcsin_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_arctan(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.arctan_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): bf_df = repeated_types_df[["string_list_col"]] sql = _apply_unary_op(bf_df, ops.ArrayToStringOp(delimiter="."), "string_list_col") + snapshot.assert_match(sql, "out.sql") def test_array_index(repeated_types_df: bpd.DataFrame, snapshot): bf_df = repeated_types_df[["string_list_col"]] sql = _apply_unary_op(bf_df, convert_index(1), "string_list_col") + snapshot.assert_match(sql, "out.sql") def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot): bf_df = repeated_types_df[["string_list_col"]] sql = _apply_unary_op(bf_df, convert_slice(slice(1, None)), "string_list_col") + snapshot.assert_match(sql, "out.sql") def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snapshot): bf_df = repeated_types_df[["string_list_col"]] sql = _apply_unary_op(bf_df, convert_slice(slice(1, 5)), "string_list_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_cos(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.cos_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_hash(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.hash_op, "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_isnull(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.isnull_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_notnull(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.notnull_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_sin(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.sin_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_sinh(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.sinh_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_tan(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.tan_op, "float64_col") + snapshot.assert_match(sql, "out.sql") def test_json_extract(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONExtract(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_json_extract_array(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONExtractArray(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONExtractStringArray(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_json_query(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONQuery(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_json_query_array(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONQueryArray(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_json_value(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.JSONValue(json_path="$"), "json_col") + snapshot.assert_match(sql, "out.sql") def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["string_col"]] sql = _apply_unary_op(bf_df, ops.ParseJSON(), "string_col") + snapshot.assert_match(sql, "out.sql") def test_to_json_string(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_unary_op(bf_df, ops.ToJSONString(), "json_col") + snapshot.assert_match(sql, "out.sql")