Skip to content

Commit e199c75

Browse files
committed
chore: implement eq, eq_null_match, ne compilers
1 parent 1445c98 commit e199c75

File tree

5 files changed

+198
-23
lines changed

5 files changed

+198
-23
lines changed

bigframes/core/compile/sqlglot/expressions/binary_compiler.py

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,7 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
3838
return sge.Concat(expressions=[left.expr, right.expr])
3939

4040
if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype):
41-
left_expr = left.expr
42-
if left.dtype == dtypes.BOOL_DTYPE:
43-
left_expr = sge.Cast(this=left_expr, to="INT64")
44-
right_expr = right.expr
45-
if right.dtype == dtypes.BOOL_DTYPE:
46-
right_expr = sge.Cast(this=right_expr, to="INT64")
41+
left_expr, right_expr = _coerce_bools(left, right)
4742
return sge.Add(this=left_expr, expression=right_expr)
4843

4944
if (
@@ -74,15 +69,36 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
7469
)
7570

7671

77-
@BINARY_OP_REGISTRATION.register(ops.div_op)
72+
@BINARY_OP_REGISTRATION.register(ops.eq_op)
73+
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
74+
left_expr, right_expr = _coerce_bools(left, right)
75+
return sge.EQ(this=left_expr, expression=right_expr)
76+
77+
78+
@BINARY_OP_REGISTRATION.register(ops.eq_null_match_op)
7879
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
7980
left_expr = left.expr
80-
if left.dtype == dtypes.BOOL_DTYPE:
81+
if left.dtype == dtypes.BOOL_DTYPE and right.dtype != dtypes.BOOL_DTYPE:
8182
left_expr = sge.Cast(this=left_expr, to="INT64")
83+
8284
right_expr = right.expr
83-
if right.dtype == dtypes.BOOL_DTYPE:
85+
if right.dtype == dtypes.BOOL_DTYPE and left.dtype != dtypes.BOOL_DTYPE:
8486
right_expr = sge.Cast(this=right_expr, to="INT64")
8587

88+
sentinel = sge.convert("$NULL_SENTINEL$")
89+
left_coalesce = sge.Coalesce(
90+
this=sge.Cast(this=left_expr, to="STRING"), expressions=[sentinel]
91+
)
92+
right_coalesce = sge.Coalesce(
93+
this=sge.Cast(this=right_expr, to="STRING"), expressions=[sentinel]
94+
)
95+
return sge.EQ(this=left_coalesce, expression=right_coalesce)
96+
97+
98+
@BINARY_OP_REGISTRATION.register(ops.div_op)
99+
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
100+
left_expr, right_expr = _coerce_bools(left, right)
101+
86102
result = sge.func("IEEE_DIVIDE", left_expr, right_expr)
87103
if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
88104
return sge.Cast(this=sge.Floor(this=result), to="INT64")
@@ -139,12 +155,7 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
139155

140156
@BINARY_OP_REGISTRATION.register(ops.mul_op)
141157
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
142-
left_expr = left.expr
143-
if left.dtype == dtypes.BOOL_DTYPE:
144-
left_expr = sge.Cast(this=left_expr, to="INT64")
145-
right_expr = right.expr
146-
if right.dtype == dtypes.BOOL_DTYPE:
147-
right_expr = sge.Cast(this=right_expr, to="INT64")
158+
left_expr, right_expr = _coerce_bools(left, right)
148159

149160
result = sge.Mul(this=left_expr, expression=right_expr)
150161

@@ -156,15 +167,16 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
156167
return result
157168

158169

170+
@BINARY_OP_REGISTRATION.register(ops.ne_op)
171+
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
172+
left_expr, right_expr = _coerce_bools(left, right)
173+
return sge.NEQ(this=left_expr, expression=right_expr)
174+
175+
159176
@BINARY_OP_REGISTRATION.register(ops.sub_op)
160177
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
161178
if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype):
162-
left_expr = left.expr
163-
if left.dtype == dtypes.BOOL_DTYPE:
164-
left_expr = sge.Cast(this=left_expr, to="INT64")
165-
right_expr = right.expr
166-
if right.dtype == dtypes.BOOL_DTYPE:
167-
right_expr = sge.Cast(this=right_expr, to="INT64")
179+
left_expr, right_expr = _coerce_bools(left, right)
168180
return sge.Sub(this=left_expr, expression=right_expr)
169181

170182
if (
@@ -201,3 +213,16 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
201213
@BINARY_OP_REGISTRATION.register(ops.obj_make_ref_op)
202214
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
203215
return sge.func("OBJ.MAKE_REF", left.expr, right.expr)
216+
217+
218+
def _coerce_bools(
219+
left: TypedExpr, right: TypedExpr
220+
) -> tuple[sge.Expression, sge.Expression]:
221+
"""Coerce boolean expressions to INT64 for binary operations."""
222+
left_expr = left.expr
223+
if left.dtype == dtypes.BOOL_DTYPE:
224+
left_expr = sge.Cast(this=left_expr, to="INT64")
225+
right_expr = right.expr
226+
if right.dtype == dtypes.BOOL_DTYPE:
227+
right_expr = sge.Cast(this=right_expr, to="INT64")
228+
return left_expr, right_expr
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`bool_col` AS `bfcol_0`,
4+
`int64_col` AS `bfcol_1`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
6+
), `bfcte_1` AS (
7+
SELECT
8+
*,
9+
COALESCE(CAST(`bfcol_1` AS STRING), '$NULL_SENTINEL$') = COALESCE(CAST(CAST(`bfcol_0` AS INT64) AS STRING), '$NULL_SENTINEL$') AS `bfcol_4`
10+
FROM `bfcte_0`
11+
)
12+
SELECT
13+
`bfcol_4` AS `int64_col`
14+
FROM `bfcte_1`
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`bool_col` AS `bfcol_0`,
4+
`int64_col` AS `bfcol_1`,
5+
`rowindex` AS `bfcol_2`
6+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
7+
), `bfcte_1` AS (
8+
SELECT
9+
*,
10+
`bfcol_2` AS `bfcol_6`,
11+
`bfcol_1` AS `bfcol_7`,
12+
`bfcol_0` AS `bfcol_8`,
13+
`bfcol_1` = `bfcol_1` AS `bfcol_9`
14+
FROM `bfcte_0`
15+
), `bfcte_2` AS (
16+
SELECT
17+
*,
18+
`bfcol_6` AS `bfcol_14`,
19+
`bfcol_7` AS `bfcol_15`,
20+
`bfcol_8` AS `bfcol_16`,
21+
`bfcol_9` AS `bfcol_17`,
22+
`bfcol_7` = 1 AS `bfcol_18`
23+
FROM `bfcte_1`
24+
), `bfcte_3` AS (
25+
SELECT
26+
*,
27+
`bfcol_14` AS `bfcol_24`,
28+
`bfcol_15` AS `bfcol_25`,
29+
`bfcol_16` AS `bfcol_26`,
30+
`bfcol_17` AS `bfcol_27`,
31+
`bfcol_18` AS `bfcol_28`,
32+
`bfcol_15` = CAST(`bfcol_16` AS INT64) AS `bfcol_29`
33+
FROM `bfcte_2`
34+
), `bfcte_4` AS (
35+
SELECT
36+
*,
37+
`bfcol_24` AS `bfcol_36`,
38+
`bfcol_25` AS `bfcol_37`,
39+
`bfcol_26` AS `bfcol_38`,
40+
`bfcol_27` AS `bfcol_39`,
41+
`bfcol_28` AS `bfcol_40`,
42+
`bfcol_29` AS `bfcol_41`,
43+
CAST(`bfcol_26` AS INT64) = `bfcol_25` AS `bfcol_42`
44+
FROM `bfcte_3`
45+
)
46+
SELECT
47+
`bfcol_36` AS `rowindex`,
48+
`bfcol_37` AS `int64_col`,
49+
`bfcol_38` AS `bool_col`,
50+
`bfcol_39` AS `int_ne_int`,
51+
`bfcol_40` AS `int_ne_1`,
52+
`bfcol_41` AS `int_ne_bool`,
53+
`bfcol_42` AS `bool_ne_int`
54+
FROM `bfcte_4`
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`bool_col` AS `bfcol_0`,
4+
`int64_col` AS `bfcol_1`,
5+
`rowindex` AS `bfcol_2`
6+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
7+
), `bfcte_1` AS (
8+
SELECT
9+
*,
10+
`bfcol_2` AS `bfcol_6`,
11+
`bfcol_1` AS `bfcol_7`,
12+
`bfcol_0` AS `bfcol_8`,
13+
`bfcol_1` <> `bfcol_1` AS `bfcol_9`
14+
FROM `bfcte_0`
15+
), `bfcte_2` AS (
16+
SELECT
17+
*,
18+
`bfcol_6` AS `bfcol_14`,
19+
`bfcol_7` AS `bfcol_15`,
20+
`bfcol_8` AS `bfcol_16`,
21+
`bfcol_9` AS `bfcol_17`,
22+
`bfcol_7` <> 1 AS `bfcol_18`
23+
FROM `bfcte_1`
24+
), `bfcte_3` AS (
25+
SELECT
26+
*,
27+
`bfcol_14` AS `bfcol_24`,
28+
`bfcol_15` AS `bfcol_25`,
29+
`bfcol_16` AS `bfcol_26`,
30+
`bfcol_17` AS `bfcol_27`,
31+
`bfcol_18` AS `bfcol_28`,
32+
`bfcol_15` <> CAST(`bfcol_16` AS INT64) AS `bfcol_29`
33+
FROM `bfcte_2`
34+
), `bfcte_4` AS (
35+
SELECT
36+
*,
37+
`bfcol_24` AS `bfcol_36`,
38+
`bfcol_25` AS `bfcol_37`,
39+
`bfcol_26` AS `bfcol_38`,
40+
`bfcol_27` AS `bfcol_39`,
41+
`bfcol_28` AS `bfcol_40`,
42+
`bfcol_29` AS `bfcol_41`,
43+
CAST(`bfcol_26` AS INT64) <> `bfcol_25` AS `bfcol_42`
44+
FROM `bfcte_3`
45+
)
46+
SELECT
47+
`bfcol_36` AS `rowindex`,
48+
`bfcol_37` AS `int64_col`,
49+
`bfcol_38` AS `bool_col`,
50+
`bfcol_39` AS `int_ne_int`,
51+
`bfcol_40` AS `int_ne_1`,
52+
`bfcol_41` AS `int_ne_bool`,
53+
`bfcol_42` AS `bool_ne_int`
54+
FROM `bfcte_4`

tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,24 @@ def test_div_timedelta(scalar_types_df: bpd.DataFrame, snapshot):
107107
snapshot.assert_match(bf_df.sql, "out.sql")
108108

109109

110+
def test_eq_null_match(scalar_types_df: bpd.DataFrame, snapshot):
111+
bf_df = scalar_types_df[["int64_col", "bool_col"]]
112+
sql = _apply_binary_op(bf_df, ops.eq_null_match_op, "int64_col", "bool_col")
113+
snapshot.assert_match(sql, "out.sql")
114+
115+
116+
def test_eq_numeric(scalar_types_df: bpd.DataFrame, snapshot):
117+
bf_df = scalar_types_df[["int64_col", "bool_col"]]
118+
119+
bf_df["int_ne_int"] = bf_df["int64_col"] == bf_df["int64_col"]
120+
bf_df["int_ne_1"] = bf_df["int64_col"] == 1
121+
122+
bf_df["int_ne_bool"] = bf_df["int64_col"] == bf_df["bool_col"]
123+
bf_df["bool_ne_int"] = bf_df["bool_col"] == bf_df["int64_col"]
124+
125+
snapshot.assert_match(bf_df.sql, "out.sql")
126+
127+
110128
def test_floordiv_numeric(scalar_types_df: bpd.DataFrame, snapshot):
111129
bf_df = scalar_types_df[["int64_col", "bool_col", "float64_col"]]
112130

@@ -121,8 +139,6 @@ def test_floordiv_numeric(scalar_types_df: bpd.DataFrame, snapshot):
121139
bf_df["int_div_bool"] = bf_df["int64_col"] // bf_df["bool_col"]
122140
bf_df["bool_div_int"] = bf_df["bool_col"] // bf_df["int64_col"]
123141

124-
snapshot.assert_match(bf_df.sql, "out.sql")
125-
126142

127143
def test_floordiv_timedelta(scalar_types_df: bpd.DataFrame, snapshot):
128144
bf_df = scalar_types_df[["timestamp_col", "date_col"]]
@@ -200,3 +216,15 @@ def test_mul_timedelta(scalar_types_df: bpd.DataFrame, snapshot):
200216
def test_obj_make_ref(scalar_types_df: bpd.DataFrame, snapshot):
201217
blob_df = scalar_types_df["string_col"].str.to_blob()
202218
snapshot.assert_match(blob_df.to_frame().sql, "out.sql")
219+
220+
221+
def test_ne_numeric(scalar_types_df: bpd.DataFrame, snapshot):
222+
bf_df = scalar_types_df[["int64_col", "bool_col"]]
223+
224+
bf_df["int_ne_int"] = bf_df["int64_col"] != bf_df["int64_col"]
225+
bf_df["int_ne_1"] = bf_df["int64_col"] != 1
226+
227+
bf_df["int_ne_bool"] = bf_df["int64_col"] != bf_df["bool_col"]
228+
bf_df["bool_ne_int"] = bf_df["bool_col"] != bf_df["int64_col"]
229+
230+
snapshot.assert_match(bf_df.sql, "out.sql")

0 commit comments

Comments
 (0)