From 25ba76f8379f29a96c2191e77a0fb48dd00a4c19 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 13 Aug 2025 23:43:39 +0000 Subject: [PATCH] chore: implement blob scalar ops for sqlglot compilers --- .../sqlglot/expressions/binary_compiler.py | 5 ++ .../sqlglot/expressions/unary_compiler.py | 61 +++++++++++-------- bigframes/operations/blob_ops.py | 4 +- .../test_obj_make_ref/out.sql | 15 +++++ .../test_obj_fetch_metadata/out.sql | 25 ++++++++ .../test_obj_get_access_url/out.sql | 25 ++++++++ .../expressions/test_binary_compiler.py | 5 ++ .../expressions/test_unary_compiler.py | 12 ++++ 8 files changed, 124 insertions(+), 28 deletions(-) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_obj_make_ref/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_fetch_metadata/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_get_access_url/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py index fa640ee0b2..b5d665e2e5 100644 --- a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py @@ -158,3 +158,8 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: raise TypeError( f"Cannot subtract type {left.dtype} and {right.dtype}. {constants.FEEDBACK_LINK}" ) + + +@BINARY_OP_REGISTRATION.register(ops.obj_make_ref_op) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + return sge.func("OBJ.MAKE_REF", left.expr, right.expr) diff --git a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py index 125c60bbf4..60a8baea96 100644 --- a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py @@ -347,6 +347,26 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: ) +@UNARY_OP_REGISTRATION.register(ops.iso_day_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.iso_week_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.iso_year_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.isnull_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Is(this=expr.expr, expression=sge.Null()) + + @UNARY_OP_REGISTRATION.register(ops.isnumeric_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\pN+$")) @@ -445,6 +465,21 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this="DAY")) +@UNARY_OP_REGISTRATION.register(ops.notnull_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null())) + + +@UNARY_OP_REGISTRATION.register(ops.obj_fetch_metadata_op) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("OBJ.FETCH_METADATA", expr.expr) + + +@UNARY_OP_REGISTRATION.register(ops.ObjGetAccessUrl) +def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: + return sge.func("OBJ.GET_ACCESS_URL", expr.expr) + + @UNARY_OP_REGISTRATION.register(ops.pos_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return expr.expr @@ -488,31 +523,6 @@ def _(op: ops.StrStripOp, expr: TypedExpr) -> sge.Expression: return sge.Trim(this=sge.convert(op.to_strip), expression=expr.expr) -@UNARY_OP_REGISTRATION.register(ops.iso_day_op) -def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr) - - -@UNARY_OP_REGISTRATION.register(ops.iso_week_op) -def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr) - - -@UNARY_OP_REGISTRATION.register(ops.iso_year_op) -def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr) - - -@UNARY_OP_REGISTRATION.register(ops.isnull_op) -def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: - return sge.Is(this=expr.expr, expression=sge.Null()) - - -@UNARY_OP_REGISTRATION.register(ops.notnull_op) -def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: - return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null())) - - @UNARY_OP_REGISTRATION.register(ops.sin_op) def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression: return sge.func("SIN", expr.expr) @@ -626,7 +636,6 @@ def _(op: ops.UnixSeconds, expr: TypedExpr) -> sge.Expression: return sge.func("UNIX_SECONDS", expr.expr) -# JSON Ops @UNARY_OP_REGISTRATION.register(ops.JSONExtract) def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression: return sge.func("JSON_EXTRACT", expr.expr, sge.convert(op.json_path)) diff --git a/bigframes/operations/blob_ops.py b/bigframes/operations/blob_ops.py index 2936e0f14f..29f23a2f70 100644 --- a/bigframes/operations/blob_ops.py +++ b/bigframes/operations/blob_ops.py @@ -36,11 +36,11 @@ def output_type(self, *input_types): @dataclasses.dataclass(frozen=True) class ObjMakeRef(base_ops.BinaryOp): - name: typing.ClassVar[str] = "obj.make_ref" + name: typing.ClassVar[str] = "obj_make_ref" def output_type(self, *input_types): if not all(map(dtypes.is_string_like, input_types)): - raise TypeError("obj.make_ref requires string-like arguments") + raise TypeError("obj_make_ref requires string-like arguments") return dtypes.OBJ_REF_DTYPE diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_obj_make_ref/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_obj_make_ref/out.sql new file mode 100644 index 0000000000..e3228feaaa --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_obj_make_ref/out.sql @@ -0,0 +1,15 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex` AS `bfcol_0`, + `string_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + OBJ.MAKE_REF(`bfcol_1`, 'bigframes-dev.test-region.bigframes-default-connection') AS `bfcol_4` + FROM `bfcte_0` +) +SELECT + `bfcol_0` AS `rowindex`, + `bfcol_4` AS `string_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_fetch_metadata/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_fetch_metadata/out.sql new file mode 100644 index 0000000000..134fdc363b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_fetch_metadata/out.sql @@ -0,0 +1,25 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex` AS `bfcol_0`, + `string_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + OBJ.MAKE_REF(`bfcol_1`, 'bigframes-dev.test-region.bigframes-default-connection') AS `bfcol_4` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + OBJ.FETCH_METADATA(`bfcol_4`) AS `bfcol_7` + FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_7`.`version` AS `bfcol_10` + FROM `bfcte_2` +) +SELECT + `bfcol_0` AS `rowindex`, + `bfcol_10` AS `version` +FROM `bfcte_3` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_get_access_url/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_get_access_url/out.sql new file mode 100644 index 0000000000..4a963b4972 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_get_access_url/out.sql @@ -0,0 +1,25 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex` AS `bfcol_0`, + `string_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + OBJ.MAKE_REF(`bfcol_1`, 'bigframes-dev.test-region.bigframes-default-connection') AS `bfcol_4` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + OBJ.GET_ACCESS_URL(`bfcol_4`) AS `bfcol_7` + FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + JSON_VALUE(`bfcol_7`, '$.access_urls.read_url') AS `bfcol_10` + FROM `bfcte_2` +) +SELECT + `bfcol_0` AS `rowindex`, + `bfcol_10` AS `string_col` +FROM `bfcte_3` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py index 6521a92df0..e4e1f93259 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py @@ -164,3 +164,8 @@ def test_mul_timedelta(scalar_types_df: bpd.DataFrame, snapshot): bf_df["numeric_mul_timedelta"] = bf_df["int64_col"] * timedelta snapshot.assert_match(bf_df.sql, "out.sql") + + +def test_obj_make_ref(scalar_types_df: bpd.DataFrame, snapshot): + blob_df = scalar_types_df["string_col"].str.to_blob() + snapshot.assert_match(blob_df.to_frame().sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py index 0a930d68ae..e5a76b067f 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py @@ -405,6 +405,18 @@ def test_normalize(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_obj_fetch_metadata(scalar_types_df: bpd.DataFrame, snapshot): + blob_s = scalar_types_df["string_col"].str.to_blob() + sql = blob_s.blob.version().to_frame().sql + snapshot.assert_match(sql, "out.sql") + + +def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): + blob_s = scalar_types_df["string_col"].str.to_blob() + sql = blob_s.blob.read_url().to_frame().sql + snapshot.assert_match(sql, "out.sql") + + def test_pos(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["float64_col"]] sql = _apply_unary_op(bf_df, ops.pos_op, "float64_col")