Skip to content

Commit 47716f1

Browse files
committed
chore: implement blob scalar ops for sqlglot compilers
1 parent 132e0ed commit 47716f1

File tree

8 files changed

+124
-28
lines changed

8 files changed

+124
-28
lines changed

bigframes/core/compile/sqlglot/expressions/binary_compiler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,8 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
123123
@BINARY_OP_REGISTRATION.register(ops.JSONSet)
124124
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
125125
return sge.func("JSON_SET", left.expr, sge.convert(op.json_path), right.expr)
126+
127+
128+
@BINARY_OP_REGISTRATION.register(ops.obj_make_ref_op)
129+
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
130+
return sge.func("OBJ.MAKE_REF", left.expr, right.expr)

bigframes/core/compile/sqlglot/expressions/unary_compiler.py

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,26 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
347347
)
348348

349349

350+
@UNARY_OP_REGISTRATION.register(ops.iso_day_op)
351+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
352+
return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr)
353+
354+
355+
@UNARY_OP_REGISTRATION.register(ops.iso_week_op)
356+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
357+
return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr)
358+
359+
360+
@UNARY_OP_REGISTRATION.register(ops.iso_year_op)
361+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
362+
return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr)
363+
364+
365+
@UNARY_OP_REGISTRATION.register(ops.isnull_op)
366+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
367+
return sge.Is(this=expr.expr, expression=sge.Null())
368+
369+
350370
@UNARY_OP_REGISTRATION.register(ops.isnumeric_op)
351371
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
352372
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\pN+$"))
@@ -445,6 +465,21 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
445465
return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this="DAY"))
446466

447467

468+
@UNARY_OP_REGISTRATION.register(ops.notnull_op)
469+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
470+
return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null()))
471+
472+
473+
@UNARY_OP_REGISTRATION.register(ops.obj_fetch_metadata_op)
474+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
475+
return sge.func("OBJ.FETCH_METADATA", expr.expr)
476+
477+
478+
@UNARY_OP_REGISTRATION.register(ops.ObjGetAccessUrl)
479+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
480+
return sge.func("OBJ.GET_ACCESS_URL", expr.expr)
481+
482+
448483
@UNARY_OP_REGISTRATION.register(ops.pos_op)
449484
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
450485
return expr.expr
@@ -488,31 +523,6 @@ def _(op: ops.StrStripOp, expr: TypedExpr) -> sge.Expression:
488523
return sge.Trim(this=sge.convert(op.to_strip), expression=expr.expr)
489524

490525

491-
@UNARY_OP_REGISTRATION.register(ops.iso_day_op)
492-
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
493-
return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr)
494-
495-
496-
@UNARY_OP_REGISTRATION.register(ops.iso_week_op)
497-
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
498-
return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr)
499-
500-
501-
@UNARY_OP_REGISTRATION.register(ops.iso_year_op)
502-
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
503-
return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr)
504-
505-
506-
@UNARY_OP_REGISTRATION.register(ops.isnull_op)
507-
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
508-
return sge.Is(this=expr.expr, expression=sge.Null())
509-
510-
511-
@UNARY_OP_REGISTRATION.register(ops.notnull_op)
512-
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
513-
return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null()))
514-
515-
516526
@UNARY_OP_REGISTRATION.register(ops.sin_op)
517527
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
518528
return sge.func("SIN", expr.expr)
@@ -626,7 +636,6 @@ def _(op: ops.UnixSeconds, expr: TypedExpr) -> sge.Expression:
626636
return sge.func("UNIX_SECONDS", expr.expr)
627637

628638

629-
# JSON Ops
630639
@UNARY_OP_REGISTRATION.register(ops.JSONExtract)
631640
def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression:
632641
return sge.func("JSON_EXTRACT", expr.expr, sge.convert(op.json_path))

bigframes/operations/blob_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ def output_type(self, *input_types):
3636

3737
@dataclasses.dataclass(frozen=True)
3838
class ObjMakeRef(base_ops.BinaryOp):
39-
name: typing.ClassVar[str] = "obj.make_ref"
39+
name: typing.ClassVar[str] = "obj_make_ref"
4040

4141
def output_type(self, *input_types):
4242
if not all(map(dtypes.is_string_like, input_types)):
43-
raise TypeError("obj.make_ref requires string-like arguments")
43+
raise TypeError("obj_make_ref requires string-like arguments")
4444

4545
return dtypes.OBJ_REF_DTYPE
4646

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`rowindex` AS `bfcol_0`,
4+
`string_col` AS `bfcol_1`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
6+
), `bfcte_1` AS (
7+
SELECT
8+
*,
9+
OBJ.MAKE_REF(`bfcol_1`, 'bigframes-dev.test-region.bigframes-default-connection') AS `bfcol_4`
10+
FROM `bfcte_0`
11+
)
12+
SELECT
13+
`bfcol_0` AS `rowindex`,
14+
`bfcol_4` AS `string_col`
15+
FROM `bfcte_1`
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`rowindex` AS `bfcol_0`,
4+
`string_col` AS `bfcol_1`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
6+
), `bfcte_1` AS (
7+
SELECT
8+
*,
9+
OBJ.MAKE_REF(`bfcol_1`, 'bigframes-dev.test-region.bigframes-default-connection') AS `bfcol_4`
10+
FROM `bfcte_0`
11+
), `bfcte_2` AS (
12+
SELECT
13+
*,
14+
OBJ.FETCH_METADATA(`bfcol_4`) AS `bfcol_7`
15+
FROM `bfcte_1`
16+
), `bfcte_3` AS (
17+
SELECT
18+
*,
19+
`bfcol_7`.`version` AS `bfcol_10`
20+
FROM `bfcte_2`
21+
)
22+
SELECT
23+
`bfcol_0` AS `rowindex`,
24+
`bfcol_10` AS `version`
25+
FROM `bfcte_3`
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`rowindex` AS `bfcol_0`,
4+
`string_col` AS `bfcol_1`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
6+
), `bfcte_1` AS (
7+
SELECT
8+
*,
9+
OBJ.MAKE_REF(`bfcol_1`, 'bigframes-dev.test-region.bigframes-default-connection') AS `bfcol_4`
10+
FROM `bfcte_0`
11+
), `bfcte_2` AS (
12+
SELECT
13+
*,
14+
OBJ.GET_ACCESS_URL(`bfcol_4`) AS `bfcol_7`
15+
FROM `bfcte_1`
16+
), `bfcte_3` AS (
17+
SELECT
18+
*,
19+
JSON_VALUE(`bfcol_7`, '$.access_urls.read_url') AS `bfcol_10`
20+
FROM `bfcte_2`
21+
)
22+
SELECT
23+
`bfcol_0` AS `rowindex`,
24+
`bfcol_10` AS `string_col`
25+
FROM `bfcte_3`

tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ def test_add_unsupported_raises(scalar_types_df: bpd.DataFrame):
8282
_apply_binary_op(scalar_types_df, ops.add_op, "int64_col", "string_col")
8383

8484

85+
def test_obj_make_ref(scalar_types_df: bpd.DataFrame, snapshot):
86+
blob_df = scalar_types_df["string_col"].str.to_blob()
87+
snapshot.assert_match(blob_df.to_frame().sql, "out.sql")
88+
89+
8590
def test_json_set(json_types_df: bpd.DataFrame, snapshot):
8691
bf_df = json_types_df[["json_col"]]
8792
sql = _apply_binary_op(

tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,18 @@ def test_normalize(scalar_types_df: bpd.DataFrame, snapshot):
405405
snapshot.assert_match(sql, "out.sql")
406406

407407

408+
def test_obj_fetch_metadata(scalar_types_df: bpd.DataFrame, snapshot):
409+
blob_s = scalar_types_df["string_col"].str.to_blob()
410+
sql = blob_s.blob.version().to_frame().sql
411+
snapshot.assert_match(sql, "out.sql")
412+
413+
414+
def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot):
415+
blob_s = scalar_types_df["string_col"].str.to_blob()
416+
sql = blob_s.blob.read_url().to_frame().sql
417+
snapshot.assert_match(sql, "out.sql")
418+
419+
408420
def test_pos(scalar_types_df: bpd.DataFrame, snapshot):
409421
bf_df = scalar_types_df[["float64_col"]]
410422
sql = _apply_unary_op(bf_df, ops.pos_op, "float64_col")

0 commit comments

Comments
 (0)