Skip to content

Commit 16b30bb

Browse files
feat: Add BigQuery ObjectRef functions to bigframes.bigquery.obj
This change introduces support for BigQuery ObjectRef functions: - `OBJ.FETCH_METADATA` - `OBJ.GET_ACCESS_URL` - `OBJ.MAKE_REF` These are exposed via a new `bigframes.bigquery.obj` module. Changes: - Added `ObjMakeRefJson` and `ObjGetAccessUrlWithDuration` operations in `bigframes/operations/blob_ops.py`. - Updated `bigframes/operations/__init__.py` to export new operations. - Updated `bigframes/core/compile/ibis_compiler/scalar_op_registry.py` and `bigframes/core/compile/sqlglot/expressions/blob_ops.py` to support new operations. - Created `bigframes/bigquery/_operations/obj.py` with the implementation of `fetch_metadata`, `get_access_url`, and `make_ref`. - Created `bigframes/bigquery/obj.py` to expose the functions. - Added unit tests in `tests/unit/bigquery/test_obj.py`.
1 parent 173b83d commit 16b30bb

File tree

7 files changed

+287
-3
lines changed

7 files changed

+287
-3
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
"""
17+
ObjectRef functions defined from
18+
https://cloud.google.com/bigquery/docs/reference/standard-sql/object-ref-functions
19+
"""
20+
21+
22+
from __future__ import annotations
23+
24+
from typing import Optional, Union
25+
26+
import bigframes.core.utils as utils
27+
import bigframes.operations as ops
28+
import bigframes.series as series
29+
30+
31+
@utils.preview(name="The ObjectRef API `fetch_metadata`")
32+
def fetch_metadata(
33+
objectref: series.Series,
34+
) -> series.Series:
35+
"""The OBJ.FETCH_METADATA function returns Cloud Storage metadata for a partially populated ObjectRef value.
36+
37+
Args:
38+
objectref (bigframes.series.Series):
39+
A partially populated ObjectRef value, in which the uri and authorizer fields are populated and the details field isn't.
40+
41+
Returns:
42+
bigframes.series.Series: A fully populated ObjectRef value. The metadata is provided in the details field of the returned ObjectRef value.
43+
"""
44+
return objectref._apply_unary_op(ops.obj_fetch_metadata_op)
45+
46+
47+
@utils.preview(name="The ObjectRef API `get_access_url`")
48+
def get_access_url(
49+
objectref: series.Series,
50+
mode: str,
51+
duration: Optional[series.Series] = None,
52+
) -> series.Series:
53+
"""The OBJ.GET_ACCESS_URL function returns JSON that contains reference information for the input ObjectRef value, and also access URLs that you can use to read or modify the Cloud Storage object.
54+
55+
Args:
56+
objectref (bigframes.series.Series):
57+
An ObjectRef value that represents a Cloud Storage object.
58+
mode (str):
59+
A STRING value that identifies the type of URL that you want to be returned. The following values are supported:
60+
'r': Returns a URL that lets you read the object.
61+
'rw': Returns two URLs, one that lets you read the object, and one that lets you modify the object.
62+
duration (bigframes.series.Series, optional):
63+
An optional INTERVAL value that specifies how long the generated access URLs remain valid. You can specify a value between 30 minutes and 6 hours. For example, you could specify INTERVAL 2 HOUR to generate URLs that expire after 2 hours. The default value is 6 hours.
64+
65+
Returns:
66+
bigframes.series.Series: A JSON value that contains the Cloud Storage object reference information from the input ObjectRef value, and also one or more URLs that you can use to access the Cloud Storage object.
67+
"""
68+
if duration is not None:
69+
return objectref._apply_binary_op(
70+
duration, ops.ObjGetAccessUrlWithDuration(mode=mode)
71+
)
72+
return objectref._apply_unary_op(ops.ObjGetAccessUrl(mode=mode))
73+
74+
75+
@utils.preview(name="The ObjectRef API `make_ref`")
76+
def make_ref(
77+
uri_or_json: series.Series,
78+
authorizer: Optional[series.Series] = None,
79+
) -> series.Series:
80+
"""Use the OBJ.MAKE_REF function to create an ObjectRef value that contains reference information for a Cloud Storage object.
81+
82+
Args:
83+
uri_or_json (bigframes.series.Series):
84+
A STRING value that contains the URI for the Cloud Storage object, for example, gs://mybucket/flowers/12345.jpg.
85+
OR
86+
A JSON value that represents a Cloud Storage object.
87+
authorizer (bigframes.series.Series, optional):
88+
A STRING value that contains the Cloud Resource connection used to access the Cloud Storage object.
89+
Required if uri_or_json is a URI string.
90+
91+
Returns:
92+
bigframes.series.Series: An ObjectRef value.
93+
"""
94+
if authorizer is not None:
95+
return uri_or_json._apply_binary_op(authorizer, ops.obj_make_ref_op)
96+
97+
# If authorizer is not provided, we assume uri_or_json is a JSON objectref
98+
return uri_or_json._apply_unary_op(ops.obj_make_ref_json_op)

bigframes/bigquery/obj.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""This module exposes `BigQuery ObjectRef
16+
<https://cloud.google.com/bigquery/docs/object-table-object-ref-functions>`_ functions.
17+
"""
18+
19+
from bigframes.bigquery._operations.obj import (
20+
fetch_metadata,
21+
get_access_url,
22+
make_ref,
23+
)
24+
25+
__all__ = [
26+
"fetch_metadata",
27+
"get_access_url",
28+
"make_ref",
29+
]

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,15 @@ def obj_get_access_url_op_impl(obj_ref: ibis_types.Value, op: ops.ObjGetAccessUr
12501250
return obj_get_access_url(obj_ref=obj_ref, mode=op.mode)
12511251

12521252

1253+
@scalar_op_compiler.register_binary_op(ops.ObjGetAccessUrlWithDuration, pass_op=True)
1254+
def obj_get_access_url_with_duration_op_impl(
1255+
obj_ref: ibis_types.Value, duration: ibis_types.Value, op: ops.ObjGetAccessUrlWithDuration
1256+
):
1257+
return obj_get_access_url_with_duration(
1258+
obj_ref=obj_ref, mode=op.mode, duration=duration
1259+
)
1260+
1261+
12531262
### Binary Ops
12541263
def short_circuit_nulls(type_override: typing.Optional[ibis_dtypes.DataType] = None):
12551264
"""Wraps a binary operator to generate nulls of the expected type if either input is a null scalar."""
@@ -1807,6 +1816,11 @@ def obj_make_ref_op(x: ibis_types.Value, y: ibis_types.Value):
18071816
return obj_make_ref(uri=x, authorizer=y)
18081817

18091818

1819+
@scalar_op_compiler.register_unary_op(ops.obj_make_ref_json_op)
1820+
def obj_make_ref_json_op(x: ibis_types.Value):
1821+
return obj_make_ref_json(objectref_json=x)
1822+
1823+
18101824
# Ternary Operations
18111825
@scalar_op_compiler.register_ternary_op(ops.where_op)
18121826
def where_op(
@@ -2141,11 +2155,23 @@ def obj_make_ref(uri: str, authorizer: str) -> _OBJ_REF_IBIS_DTYPE: # type: ign
21412155
"""Make ObjectRef Struct from uri and connection."""
21422156

21432157

2158+
@ibis_udf.scalar.builtin(name="OBJ.MAKE_REF")
2159+
def obj_make_ref_json(objectref_json: ibis_dtypes.JSON) -> _OBJ_REF_IBIS_DTYPE: # type: ignore
2160+
"""Make ObjectRef Struct from json."""
2161+
2162+
21442163
@ibis_udf.scalar.builtin(name="OBJ.GET_ACCESS_URL")
21452164
def obj_get_access_url(obj_ref: _OBJ_REF_IBIS_DTYPE, mode: ibis_dtypes.String) -> ibis_dtypes.JSON: # type: ignore
21462165
"""Get access url (as ObjectRefRumtime JSON) from ObjectRef."""
21472166

21482167

2168+
@ibis_udf.scalar.builtin(name="OBJ.GET_ACCESS_URL")
2169+
def obj_get_access_url_with_duration(
2170+
obj_ref: _OBJ_REF_IBIS_DTYPE, mode: ibis_dtypes.String, duration: ibis_dtypes.Interval(unit="us") # type: ignore
2171+
) -> ibis_dtypes.JSON: # type: ignore
2172+
"""Get access url (as ObjectRefRumtime JSON) from ObjectRef."""
2173+
2174+
21492175
@ibis_udf.scalar.builtin(name="ltrim")
21502176
def str_lstrip_op( # type: ignore[empty-body]
21512177
x: ibis_dtypes.String, to_strip: ibis_dtypes.String

bigframes/core/compile/sqlglot/expressions/blob_ops.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,25 @@ def _(expr: TypedExpr) -> sge.Expression:
2929
return sge.func("OBJ.FETCH_METADATA", expr.expr)
3030

3131

32-
@register_unary_op(ops.ObjGetAccessUrl)
33-
def _(expr: TypedExpr) -> sge.Expression:
34-
return sge.func("OBJ.GET_ACCESS_URL", expr.expr)
32+
@register_unary_op(ops.ObjGetAccessUrl, pass_op=True)
33+
def _(expr: TypedExpr, op: ops.ObjGetAccessUrl) -> sge.Expression:
34+
return sge.func("OBJ.GET_ACCESS_URL", expr.expr, sge.Literal.string(op.mode))
35+
36+
37+
@register_binary_op(ops.ObjGetAccessUrlWithDuration, pass_op=True)
38+
def _(
39+
left: TypedExpr, right: TypedExpr, op: ops.ObjGetAccessUrlWithDuration
40+
) -> sge.Expression:
41+
return sge.func(
42+
"OBJ.GET_ACCESS_URL", left.expr, sge.Literal.string(op.mode), right.expr
43+
)
3544

3645

3746
@register_binary_op(ops.obj_make_ref_op)
3847
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
3948
return sge.func("OBJ.MAKE_REF", left.expr, right.expr)
49+
50+
51+
@register_unary_op(ops.obj_make_ref_json_op)
52+
def _(expr: TypedExpr) -> sge.Expression:
53+
return sge.func("OBJ.MAKE_REF", expr.expr)

bigframes/operations/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@
4040
)
4141
from bigframes.operations.blob_ops import (
4242
obj_fetch_metadata_op,
43+
obj_make_ref_json_op,
4344
obj_make_ref_op,
4445
ObjGetAccessUrl,
46+
ObjGetAccessUrlWithDuration,
4547
)
4648
from bigframes.operations.bool_ops import and_op, or_op, xor_op
4749
from bigframes.operations.comparison_ops import (
@@ -365,6 +367,8 @@
365367
"ArrayToStringOp",
366368
# Blob ops
367369
"ObjGetAccessUrl",
370+
"ObjGetAccessUrlWithDuration",
371+
"obj_make_ref_json_op",
368372
"obj_make_ref_op",
369373
"obj_fetch_metadata_op",
370374
# Struct ops

bigframes/operations/blob_ops.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,15 @@ def output_type(self, *input_types):
3434
return dtypes.JSON_DTYPE
3535

3636

37+
@dataclasses.dataclass(frozen=True)
38+
class ObjGetAccessUrlWithDuration(base_ops.BinaryOp):
39+
name: typing.ClassVar[str] = "obj_get_access_url_with_duration"
40+
mode: str # access mode, e.g. R read, W write, RW read & write
41+
42+
def output_type(self, *input_types):
43+
return dtypes.JSON_DTYPE
44+
45+
3746
@dataclasses.dataclass(frozen=True)
3847
class ObjMakeRef(base_ops.BinaryOp):
3948
name: typing.ClassVar[str] = "obj_make_ref"
@@ -46,3 +55,14 @@ def output_type(self, *input_types):
4655

4756

4857
obj_make_ref_op = ObjMakeRef()
58+
59+
60+
@dataclasses.dataclass(frozen=True)
61+
class ObjMakeRefJson(base_ops.UnaryOp):
62+
name: typing.ClassVar[str] = "obj_make_ref_json"
63+
64+
def output_type(self, *input_types):
65+
return dtypes.OBJ_REF_DTYPE
66+
67+
68+
obj_make_ref_json_op = ObjMakeRefJson()

tests/unit/bigquery/test_obj.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pytest
16+
from unittest.mock import MagicMock
17+
18+
import bigframes.bigquery.obj as obj
19+
import bigframes.operations as ops
20+
import bigframes.series as series
21+
22+
def test_fetch_metadata_op_structure():
23+
op = ops.obj_fetch_metadata_op
24+
assert op.name == "obj_fetch_metadata"
25+
26+
def test_get_access_url_op_structure():
27+
op = ops.ObjGetAccessUrl(mode="r")
28+
assert op.name == "obj_get_access_url"
29+
assert op.mode == "r"
30+
31+
def test_get_access_url_with_duration_op_structure():
32+
op = ops.ObjGetAccessUrlWithDuration(mode="rw")
33+
assert op.name == "obj_get_access_url_with_duration"
34+
assert op.mode == "rw"
35+
36+
def test_make_ref_op_structure():
37+
op = ops.obj_make_ref_op
38+
assert op.name == "obj_make_ref"
39+
40+
def test_make_ref_json_op_structure():
41+
op = ops.obj_make_ref_json_op
42+
assert op.name == "obj_make_ref_json"
43+
44+
def test_fetch_metadata_calls_apply_unary_op():
45+
s = MagicMock(spec=series.Series)
46+
47+
obj.fetch_metadata(s)
48+
49+
s._apply_unary_op.assert_called_once()
50+
args, _ = s._apply_unary_op.call_args
51+
assert args[0] == ops.obj_fetch_metadata_op
52+
53+
def test_get_access_url_calls_apply_unary_op_without_duration():
54+
s = MagicMock(spec=series.Series)
55+
56+
obj.get_access_url(s, mode="r")
57+
58+
s._apply_unary_op.assert_called_once()
59+
args, _ = s._apply_unary_op.call_args
60+
assert isinstance(args[0], ops.ObjGetAccessUrl)
61+
assert args[0].mode == "r"
62+
63+
def test_get_access_url_calls_apply_binary_op_with_duration():
64+
s = MagicMock(spec=series.Series)
65+
duration = MagicMock(spec=series.Series)
66+
67+
obj.get_access_url(s, mode="rw", duration=duration)
68+
69+
s._apply_binary_op.assert_called_once()
70+
args, kwargs = s._apply_binary_op.call_args
71+
assert args[0] == duration
72+
assert isinstance(args[1], ops.ObjGetAccessUrlWithDuration)
73+
assert args[1].mode == "rw"
74+
75+
def test_make_ref_calls_apply_binary_op_with_authorizer():
76+
uri = MagicMock(spec=series.Series)
77+
auth = MagicMock(spec=series.Series)
78+
79+
obj.make_ref(uri, authorizer=auth)
80+
81+
uri._apply_binary_op.assert_called_once()
82+
args, _ = uri._apply_binary_op.call_args
83+
assert args[0] == auth
84+
assert args[1] == ops.obj_make_ref_op
85+
86+
def test_make_ref_calls_apply_unary_op_without_authorizer():
87+
json_val = MagicMock(spec=series.Series)
88+
89+
obj.make_ref(json_val)
90+
91+
json_val._apply_unary_op.assert_called_once()
92+
args, _ = json_val._apply_unary_op.call_args
93+
assert args[0] == ops.obj_make_ref_json_op

0 commit comments

Comments
 (0)