diff --git a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py index 84e783bb66..3fcba04cfd 100644 --- a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py @@ -140,7 +140,16 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: @BINARY_OP_REGISTRATION.register(ops.ge_op) def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: - return sge.GTE(this=left.expr, expression=right.expr) + left_expr = _coerce_bool_to_int(left) + right_expr = _coerce_bool_to_int(right) + return sge.GTE(this=left_expr, expression=right_expr) + + +@BINARY_OP_REGISTRATION.register(ops.gt_op) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + left_expr = _coerce_bool_to_int(left) + right_expr = _coerce_bool_to_int(right) + return sge.GT(this=left_expr, expression=right_expr) @BINARY_OP_REGISTRATION.register(ops.JSONSet) @@ -148,6 +157,20 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.func("JSON_SET", left.expr, sge.convert(op.json_path), right.expr) +@BINARY_OP_REGISTRATION.register(ops.lt_op) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + left_expr = _coerce_bool_to_int(left) + right_expr = _coerce_bool_to_int(right) + return sge.LT(this=left_expr, expression=right_expr) + + +@BINARY_OP_REGISTRATION.register(ops.le_op) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + left_expr = _coerce_bool_to_int(left) + right_expr = _coerce_bool_to_int(right) + return sge.LTE(this=left_expr, expression=right_expr) + + @BINARY_OP_REGISTRATION.register(ops.mul_op) def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: left_expr = _coerce_bool_to_int(left) @@ -170,6 +193,11 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.NEQ(this=left_expr, expression=right_expr) +@BINARY_OP_REGISTRATION.register(ops.obj_make_ref_op) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + return sge.func("OBJ.MAKE_REF", left.expr, right.expr) + + @BINARY_OP_REGISTRATION.register(ops.sub_op) def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype): @@ -202,11 +230,6 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: ) -@BINARY_OP_REGISTRATION.register(ops.obj_make_ref_op) -def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: - return sge.func("OBJ.MAKE_REF", left.expr, right.expr) - - def _coerce_bool_to_int(typed_expr: TypedExpr) -> sge.Expression: """Coerce boolean expression to integer.""" if typed_expr.dtype == dtypes.BOOL_DTYPE: diff --git a/tests/system/small/engines/test_comparison_ops.py b/tests/system/small/engines/test_comparison_ops.py index fefff93f58..0fcc48b10a 100644 --- a/tests/system/small/engines/test_comparison_ops.py +++ b/tests/system/small/engines/test_comparison_ops.py @@ -48,7 +48,7 @@ def apply_op_pairwise( return new_arr -@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True) +@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True) @pytest.mark.parametrize( "op", [ diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_ge_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_ge_numeric/out.sql new file mode 100644 index 0000000000..494cb861a7 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_ge_numeric/out.sql @@ -0,0 +1,54 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `int64_col` AS `bfcol_1`, + `rowindex` AS `bfcol_2` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_2` AS `bfcol_6`, + `bfcol_1` AS `bfcol_7`, + `bfcol_0` AS `bfcol_8`, + `bfcol_1` >= `bfcol_1` AS `bfcol_9` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + `bfcol_6` AS `bfcol_14`, + `bfcol_7` AS `bfcol_15`, + `bfcol_8` AS `bfcol_16`, + `bfcol_9` AS `bfcol_17`, + `bfcol_7` >= 1 AS `bfcol_18` + FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + `bfcol_15` >= CAST(`bfcol_16` AS INT64) AS `bfcol_29` + FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + `bfcol_24` AS `bfcol_36`, + `bfcol_25` AS `bfcol_37`, + `bfcol_26` AS `bfcol_38`, + `bfcol_27` AS `bfcol_39`, + `bfcol_28` AS `bfcol_40`, + `bfcol_29` AS `bfcol_41`, + CAST(`bfcol_26` AS INT64) >= `bfcol_25` AS `bfcol_42` + FROM `bfcte_3` +) +SELECT + `bfcol_36` AS `rowindex`, + `bfcol_37` AS `int64_col`, + `bfcol_38` AS `bool_col`, + `bfcol_39` AS `int_ge_int`, + `bfcol_40` AS `int_ge_1`, + `bfcol_41` AS `int_ge_bool`, + `bfcol_42` AS `bool_ge_int` +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_gt_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_gt_numeric/out.sql new file mode 100644 index 0000000000..b0c8768850 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_gt_numeric/out.sql @@ -0,0 +1,54 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `int64_col` AS `bfcol_1`, + `rowindex` AS `bfcol_2` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_2` AS `bfcol_6`, + `bfcol_1` AS `bfcol_7`, + `bfcol_0` AS `bfcol_8`, + `bfcol_1` > `bfcol_1` AS `bfcol_9` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + `bfcol_6` AS `bfcol_14`, + `bfcol_7` AS `bfcol_15`, + `bfcol_8` AS `bfcol_16`, + `bfcol_9` AS `bfcol_17`, + `bfcol_7` > 1 AS `bfcol_18` + FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + `bfcol_15` > CAST(`bfcol_16` AS INT64) AS `bfcol_29` + FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + `bfcol_24` AS `bfcol_36`, + `bfcol_25` AS `bfcol_37`, + `bfcol_26` AS `bfcol_38`, + `bfcol_27` AS `bfcol_39`, + `bfcol_28` AS `bfcol_40`, + `bfcol_29` AS `bfcol_41`, + CAST(`bfcol_26` AS INT64) > `bfcol_25` AS `bfcol_42` + FROM `bfcte_3` +) +SELECT + `bfcol_36` AS `rowindex`, + `bfcol_37` AS `int64_col`, + `bfcol_38` AS `bool_col`, + `bfcol_39` AS `int_gt_int`, + `bfcol_40` AS `int_gt_1`, + `bfcol_41` AS `int_gt_bool`, + `bfcol_42` AS `bool_gt_int` +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_le_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_le_numeric/out.sql new file mode 100644 index 0000000000..2f642d8cbb --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_le_numeric/out.sql @@ -0,0 +1,54 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `int64_col` AS `bfcol_1`, + `rowindex` AS `bfcol_2` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_2` AS `bfcol_6`, + `bfcol_1` AS `bfcol_7`, + `bfcol_0` AS `bfcol_8`, + `bfcol_1` <= `bfcol_1` AS `bfcol_9` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + `bfcol_6` AS `bfcol_14`, + `bfcol_7` AS `bfcol_15`, + `bfcol_8` AS `bfcol_16`, + `bfcol_9` AS `bfcol_17`, + `bfcol_7` <= 1 AS `bfcol_18` + FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + `bfcol_15` <= CAST(`bfcol_16` AS INT64) AS `bfcol_29` + FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + `bfcol_24` AS `bfcol_36`, + `bfcol_25` AS `bfcol_37`, + `bfcol_26` AS `bfcol_38`, + `bfcol_27` AS `bfcol_39`, + `bfcol_28` AS `bfcol_40`, + `bfcol_29` AS `bfcol_41`, + CAST(`bfcol_26` AS INT64) <= `bfcol_25` AS `bfcol_42` + FROM `bfcte_3` +) +SELECT + `bfcol_36` AS `rowindex`, + `bfcol_37` AS `int64_col`, + `bfcol_38` AS `bool_col`, + `bfcol_39` AS `int_le_int`, + `bfcol_40` AS `int_le_1`, + `bfcol_41` AS `int_le_bool`, + `bfcol_42` AS `bool_le_int` +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_lt_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_lt_numeric/out.sql new file mode 100644 index 0000000000..b244e3cbcc --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_lt_numeric/out.sql @@ -0,0 +1,54 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `int64_col` AS `bfcol_1`, + `rowindex` AS `bfcol_2` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_2` AS `bfcol_6`, + `bfcol_1` AS `bfcol_7`, + `bfcol_0` AS `bfcol_8`, + `bfcol_1` < `bfcol_1` AS `bfcol_9` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + `bfcol_6` AS `bfcol_14`, + `bfcol_7` AS `bfcol_15`, + `bfcol_8` AS `bfcol_16`, + `bfcol_9` AS `bfcol_17`, + `bfcol_7` < 1 AS `bfcol_18` + FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + `bfcol_15` < CAST(`bfcol_16` AS INT64) AS `bfcol_29` + FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + `bfcol_24` AS `bfcol_36`, + `bfcol_25` AS `bfcol_37`, + `bfcol_26` AS `bfcol_38`, + `bfcol_27` AS `bfcol_39`, + `bfcol_28` AS `bfcol_40`, + `bfcol_29` AS `bfcol_41`, + CAST(`bfcol_26` AS INT64) < `bfcol_25` AS `bfcol_42` + FROM `bfcte_3` +) +SELECT + `bfcol_36` AS `rowindex`, + `bfcol_37` AS `int64_col`, + `bfcol_38` AS `bool_col`, + `bfcol_39` AS `int_lt_int`, + `bfcol_40` AS `int_lt_1`, + `bfcol_41` AS `int_lt_bool`, + `bfcol_42` AS `bool_lt_int` +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py index 11586cad02..a2218d0afa 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py @@ -149,6 +149,30 @@ def test_floordiv_timedelta(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(bf_df.sql, "out.sql") +def test_gt_numeric(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "bool_col"]] + + bf_df["int_gt_int"] = bf_df["int64_col"] > bf_df["int64_col"] + bf_df["int_gt_1"] = bf_df["int64_col"] > 1 + + bf_df["int_gt_bool"] = bf_df["int64_col"] > bf_df["bool_col"] + bf_df["bool_gt_int"] = bf_df["bool_col"] > bf_df["int64_col"] + + snapshot.assert_match(bf_df.sql, "out.sql") + + +def test_ge_numeric(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "bool_col"]] + + bf_df["int_ge_int"] = bf_df["int64_col"] >= bf_df["int64_col"] + bf_df["int_ge_1"] = bf_df["int64_col"] >= 1 + + bf_df["int_ge_bool"] = bf_df["int64_col"] >= bf_df["bool_col"] + bf_df["bool_ge_int"] = bf_df["bool_col"] >= bf_df["int64_col"] + + snapshot.assert_match(bf_df.sql, "out.sql") + + def test_json_set(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_binary_op( @@ -158,6 +182,30 @@ def test_json_set(json_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_lt_numeric(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "bool_col"]] + + bf_df["int_lt_int"] = bf_df["int64_col"] < bf_df["int64_col"] + bf_df["int_lt_1"] = bf_df["int64_col"] < 1 + + bf_df["int_lt_bool"] = bf_df["int64_col"] < bf_df["bool_col"] + bf_df["bool_lt_int"] = bf_df["bool_col"] < bf_df["int64_col"] + + snapshot.assert_match(bf_df.sql, "out.sql") + + +def test_le_numeric(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "bool_col"]] + + bf_df["int_le_int"] = bf_df["int64_col"] <= bf_df["int64_col"] + bf_df["int_le_1"] = bf_df["int64_col"] <= 1 + + bf_df["int_le_bool"] = bf_df["int64_col"] <= bf_df["bool_col"] + bf_df["bool_le_int"] = bf_df["bool_col"] <= bf_df["int64_col"] + + snapshot.assert_match(bf_df.sql, "out.sql") + + def test_sub_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "bool_col"]]