Skip to content

Commit f5559cc

Browse files
committed
chore: Migrate up to 7 scalar operators to SQLGlot
Migrated operators: - isdigit_op - islower_op - isnumeric_op - isspace_op - isupper_op - iso_day_op - iso_week_op
1 parent 04f1e6c commit f5559cc

File tree

9 files changed

+188
-0
lines changed

9 files changed

+188
-0
lines changed

bigframes/core/compile/sqlglot/expressions/unary_compiler.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,61 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
259259
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\d+$"))
260260

261261

262+
@UNARY_OP_REGISTRATION.register(ops.isdigit_op)
263+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
264+
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\p{Nd}+$"))
265+
266+
267+
@UNARY_OP_REGISTRATION.register(ops.islower_op)
268+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
269+
return sge.And(
270+
this=sge.EQ(
271+
this=sge.Lower(this=expr.expr),
272+
expression=expr.expr,
273+
),
274+
expression=sge.NEQ(
275+
this=sge.Upper(this=expr.expr),
276+
expression=expr.expr,
277+
),
278+
)
279+
280+
281+
@UNARY_OP_REGISTRATION.register(ops.isnumeric_op)
282+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
283+
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\pN+$"))
284+
285+
286+
@UNARY_OP_REGISTRATION.register(ops.isspace_op)
287+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
288+
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\s+$"))
289+
290+
291+
@UNARY_OP_REGISTRATION.register(ops.isupper_op)
292+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
293+
return sge.And(
294+
this=sge.EQ(
295+
this=sge.Upper(this=expr.expr),
296+
expression=expr.expr,
297+
),
298+
expression=sge.NEQ(
299+
this=sge.Lower(this=expr.expr),
300+
expression=expr.expr,
301+
),
302+
)
303+
304+
305+
@UNARY_OP_REGISTRATION.register(ops.iso_day_op)
306+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
307+
return sge.Extract(
308+
this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr
309+
) + sge.convert(1)
310+
311+
312+
@UNARY_OP_REGISTRATION.register(ops.iso_week_op)
313+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
314+
return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr)
315+
316+
262317
@UNARY_OP_REGISTRATION.register(ops.isnull_op)
263318
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
264319
return sge.Is(this=expr.expr, expression=sge.Null())
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
REGEXP_CONTAINS(`bfcol_0`, '^\\p{Nd}+$') AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
LOWER(`bfcol_0`) = `bfcol_0` AND UPPER(`bfcol_0`) <> `bfcol_0` AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
REGEXP_CONTAINS(`bfcol_0`, '^\\pN+$') AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`timestamp_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
EXTRACT(DAYOFWEEK FROM `bfcol_0`) + 1 AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `timestamp_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`timestamp_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
EXTRACT(ISOWEEK FROM `bfcol_0`) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `timestamp_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
REGEXP_CONTAINS(`bfcol_0`, '^\\s+$') AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
UPPER(`bfcol_0`) = `bfcol_0` AND LOWER(`bfcol_0`) <> `bfcol_0` AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`

tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,48 @@ def test_isdecimal(scalar_types_df: bpd.DataFrame, snapshot):
224224
snapshot.assert_match(sql, "out.sql")
225225

226226

227+
def test_isdigit(scalar_types_df: bpd.DataFrame, snapshot):
228+
bf_df = scalar_types_df[["string_col"]]
229+
sql = _apply_unary_op(bf_df, ops.isdigit_op, "string_col")
230+
snapshot.assert_match(sql, "out.sql")
231+
232+
233+
def test_islower(scalar_types_df: bpd.DataFrame, snapshot):
234+
bf_df = scalar_types_df[["string_col"]]
235+
sql = _apply_unary_op(bf_df, ops.islower_op, "string_col")
236+
snapshot.assert_match(sql, "out.sql")
237+
238+
239+
def test_isnumeric(scalar_types_df: bpd.DataFrame, snapshot):
240+
bf_df = scalar_types_df[["string_col"]]
241+
sql = _apply_unary_op(bf_df, ops.isnumeric_op, "string_col")
242+
snapshot.assert_match(sql, "out.sql")
243+
244+
245+
def test_isspace(scalar_types_df: bpd.DataFrame, snapshot):
246+
bf_df = scalar_types_df[["string_col"]]
247+
sql = _apply_unary_op(bf_df, ops.isspace_op, "string_col")
248+
snapshot.assert_match(sql, "out.sql")
249+
250+
251+
def test_isupper(scalar_types_df: bpd.DataFrame, snapshot):
252+
bf_df = scalar_types_df[["string_col"]]
253+
sql = _apply_unary_op(bf_df, ops.isupper_op, "string_col")
254+
snapshot.assert_match(sql, "out.sql")
255+
256+
257+
def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot):
258+
bf_df = scalar_types_df[["timestamp_col"]]
259+
sql = _apply_unary_op(bf_df, ops.iso_day_op, "timestamp_col")
260+
snapshot.assert_match(sql, "out.sql")
261+
262+
263+
def test_iso_week(scalar_types_df: bpd.DataFrame, snapshot):
264+
bf_df = scalar_types_df[["timestamp_col"]]
265+
sql = _apply_unary_op(bf_df, ops.iso_week_op, "timestamp_col")
266+
snapshot.assert_match(sql, "out.sql")
267+
268+
227269
def test_isnull(scalar_types_df: bpd.DataFrame, snapshot):
228270
bf_df = scalar_types_df[["float64_col"]]
229271
sql = _apply_unary_op(bf_df, ops.isnull_op, "float64_col")

0 commit comments

Comments
 (0)