diff --git a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py index 22079a9a6d..011413b67f 100644 --- a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py @@ -175,6 +175,16 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: ) +@UNARY_OP_REGISTRATION.register(ops.StrContainsRegexOp) +def _(op: ops.StrContainsRegexOp, expr: TypedExpr) -> sge.Expression: + return sge.RegexpLike(this=expr.expr, expression=sge.convert(op.pat)) + + +@UNARY_OP_REGISTRATION.register(ops.StrContainsOp) +def _(op: ops.StrContainsOp, expr: TypedExpr) -> sge.Expression: + return sge.Like(this=expr.expr, expression=sge.convert(f"%{op.pat}%")) + + @UNARY_OP_REGISTRATION.register(ops.date_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.Date(this=expr.expr) @@ -302,6 +312,98 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: ) +@UNARY_OP_REGISTRATION.register(ops.len_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Length(this=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.ln_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(0), + true=_NAN, + ) + ], + default=sge.Ln(this=expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.log10_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(0), + true=_NAN, + ) + ], + default=sge.Log(this=expr.expr, expression=sge.convert(10)), + ) + + +@UNARY_OP_REGISTRATION.register(ops.log1p_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(-1), + true=_NAN, + ) + ], + default=sge.Ln(this=sge.convert(1) + expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.lower_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Lower(this=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.StrLstripOp) +def _(op: ops.StrLstripOp, expr: TypedExpr) -> sge.Expression: + return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT") + + +@UNARY_OP_REGISTRATION.register(ops.neg_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Neg(this=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.pos_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return expr.expr + + +@UNARY_OP_REGISTRATION.register(ops.reverse_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("REVERSE", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.StrRstripOp) +def _(op: ops.StrRstripOp, expr: TypedExpr) -> sge.Expression: + return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="RIGHT") + + +@UNARY_OP_REGISTRATION.register(ops.sqrt_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(0), + true=_NAN, + ) + ], + default=sge.Sqrt(this=expr.expr), + ) + + +@UNARY_OP_REGISTRATION.register(ops.StrStripOp) +def _(op: ops.StrStripOp, expr: TypedExpr) -> sge.Expression: + return sge.Trim(this=sge.convert(op.to_strip), expression=expr.expr) + + @UNARY_OP_REGISTRATION.register(ops.iso_day_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr) @@ -394,3 +496,8 @@ def _(op: ops.ParseJSON, expr: TypedExpr) -> sge.Expression: @UNARY_OP_REGISTRATION.register(ops.ToJSONString) def _(op: ops.ToJSONString, expr: TypedExpr) -> sge.Expression: return sge.func("TO_JSON_STRING", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.upper_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Upper(this=expr.expr) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_len/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_len/out.sql new file mode 100644 index 0000000000..35fd087bc7 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_len/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + LENGTH(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_ln/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_ln/out.sql new file mode 100644 index 0000000000..1372c088d9 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_ln/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` < 0 THEN CAST('NaN' AS FLOAT64) ELSE LN(`bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log10/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log10/out.sql new file mode 100644 index 0000000000..b4cced439b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log10/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` < 0 THEN CAST('NaN' AS FLOAT64) ELSE LOG(10, `bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log1p/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log1p/out.sql new file mode 100644 index 0000000000..c3902ec174 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log1p/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` < -1 THEN CAST('NaN' AS FLOAT64) ELSE LN(1 + `bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lower/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lower/out.sql new file mode 100644 index 0000000000..e730cdee15 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lower/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + LOWER(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lstrip/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lstrip/out.sql new file mode 100644 index 0000000000..49ed89b40b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lstrip/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + TRIM(`bfcol_0`, ' ') AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_neg/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_neg/out.sql new file mode 100644 index 0000000000..46c58f766d --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_neg/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + -`bfcol_0` AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_pos/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_pos/out.sql new file mode 100644 index 0000000000..2d6322a182 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_pos/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_0` AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_reverse/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_reverse/out.sql new file mode 100644 index 0000000000..1ef1074149 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_reverse/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + REVERSE(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_rstrip/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_rstrip/out.sql new file mode 100644 index 0000000000..49ed89b40b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_rstrip/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + TRIM(`bfcol_0`, ' ') AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sqrt/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sqrt/out.sql new file mode 100644 index 0000000000..e6a93e5e6c --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sqrt/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` < 0 THEN CAST('NaN' AS FLOAT64) ELSE SQRT(`bfcol_0`) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `float64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains/out.sql new file mode 100644 index 0000000000..a1aa0539ee --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_0` LIKE '%e%' AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains_regex/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains_regex/out.sql new file mode 100644 index 0000000000..d0383172cb --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains_regex/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + REGEXP_CONTAINS(`bfcol_0`, 'e') AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_strip/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_strip/out.sql new file mode 100644 index 0000000000..311f2c1727 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_strip/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + TRIM(' ', `bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_upper/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_upper/out.sql new file mode 100644 index 0000000000..d22c8cff5a --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_upper/out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `string_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + UPPER(`bfcol_0`) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py index 9f04450d38..5f29e98e81 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py @@ -265,6 +265,104 @@ def test_isupper(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_len(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.len_op, "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_ln(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.ln_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_log10(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.log10_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_log1p(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.log1p_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_lower(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.lower_op, "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_lstrip(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrLstripOp(" "), "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_neg(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.neg_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_pos(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.pos_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_reverse(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.reverse_op, "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_rstrip(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrRstripOp(" "), "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_sqrt(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["float64_col"]] + sql = _apply_unary_op(bf_df, ops.sqrt_op, "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_str_contains(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrContainsOp("e"), "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_str_contains_regex(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrContainsRegexOp("e"), "string_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_strip(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.StrStripOp(" "), "string_col") + + snapshot.assert_match(sql, "out.sql") + + def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["timestamp_col"]] sql = _apply_unary_op(bf_df, ops.iso_day_op, "timestamp_col") @@ -375,3 +473,10 @@ def test_to_json_string(json_types_df: bpd.DataFrame, snapshot): sql = _apply_unary_op(bf_df, ops.ToJSONString(), "json_col") snapshot.assert_match(sql, "out.sql") + + +def test_upper(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["string_col"]] + sql = _apply_unary_op(bf_df, ops.upper_op, "string_col") + + snapshot.assert_match(sql, "out.sql")