Skip to content

Commit fc11a1c

Browse files
authored
Merge branch 'main' into b329865893-groupby-iter
2 parents d68b56d + a2daa3f commit fc11a1c

File tree

152 files changed

+2672
-2122
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

152 files changed

+2672
-2122
lines changed

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,20 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.21.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.20.0...v2.21.0) (2025-09-17)
8+
9+
10+
### Features
11+
12+
* Add bigframes.bigquery.to_json ([#2078](https://github.com/googleapis/python-bigquery-dataframes/issues/2078)) ([0fc795a](https://github.com/googleapis/python-bigquery-dataframes/commit/0fc795a9fb56f469b62603462c3f0f56f52bfe04))
13+
* Support average='binary' in precision_score() ([#2080](https://github.com/googleapis/python-bigquery-dataframes/issues/2080)) ([920f381](https://github.com/googleapis/python-bigquery-dataframes/commit/920f381aec7e0a0b986886cdbc333e86335c6d7d))
14+
* Support pandas series in ai.generate_bool ([#2086](https://github.com/googleapis/python-bigquery-dataframes/issues/2086)) ([a3de53f](https://github.com/googleapis/python-bigquery-dataframes/commit/a3de53f68b2a24f4ed85a474dfaff9b59570a2f1))
15+
16+
17+
### Bug Fixes
18+
19+
* Allow bigframes.options.bigquery.credentials to be `None` ([#2092](https://github.com/googleapis/python-bigquery-dataframes/issues/2092)) ([78f4001](https://github.com/googleapis/python-bigquery-dataframes/commit/78f4001e8fcfc77fc82f3893d58e0d04c0f6d3db))
20+
721
## [2.20.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.19.0...v2.20.0) (2025-09-16)
822

923

bigframes/bigquery/_operations/ai.py

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,25 @@
1919
from __future__ import annotations
2020

2121
import json
22-
from typing import Any, List, Literal, Mapping, Tuple
22+
from typing import Any, List, Literal, Mapping, Tuple, Union
2323

24-
from bigframes import clients, dtypes, series
25-
from bigframes.core import log_adapter
24+
import pandas as pd
25+
26+
from bigframes import clients, dtypes, series, session
27+
from bigframes.core import convert, log_adapter
2628
from bigframes.operations import ai_ops
2729

30+
PROMPT_TYPE = Union[
31+
series.Series,
32+
pd.Series,
33+
List[Union[str, series.Series, pd.Series]],
34+
Tuple[Union[str, series.Series, pd.Series], ...],
35+
]
36+
2837

2938
@log_adapter.method_logger(custom_base_name="bigquery_ai")
3039
def generate_bool(
31-
prompt: series.Series | List[str | series.Series] | Tuple[str | series.Series, ...],
40+
prompt: PROMPT_TYPE,
3241
*,
3342
connection_id: str | None = None,
3443
endpoint: str | None = None,
@@ -51,7 +60,7 @@ def generate_bool(
5160
0 {'result': True, 'full_response': '{"candidate...
5261
1 {'result': True, 'full_response': '{"candidate...
5362
2 {'result': False, 'full_response': '{"candidat...
54-
dtype: struct<result: bool, full_response: string, status: string>[pyarrow]
63+
dtype: struct<result: bool, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow]
5564
5665
>>> bbq.ai.generate_bool((df["col_1"], " is a ", df["col_2"])).struct.field("result")
5766
0 True
@@ -60,8 +69,9 @@ def generate_bool(
6069
Name: result, dtype: boolean
6170
6271
Args:
63-
prompt (series.Series | List[str|series.Series] | Tuple[str|series.Series, ...]):
64-
A mixture of Series and string literals that specifies the prompt to send to the model.
72+
prompt (Series | List[str|Series] | Tuple[str|Series, ...]):
73+
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
74+
or pandas Series.
6575
connection_id (str, optional):
6676
Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
6777
If not provided, the connection from the current session will be used.
@@ -84,7 +94,7 @@ def generate_bool(
8494
Returns:
8595
bigframes.series.Series: A new struct Series with the result data. The struct contains these fields:
8696
* "result": a BOOL value containing the model's response to the prompt. The result is None if the request fails or is filtered by responsible AI.
87-
* "full_response": a STRING value containing the JSON response from the projects.locations.endpoints.generateContent call to the model.
97+
* "full_response": a JSON value containing the response from the projects.locations.endpoints.generateContent call to the model.
8898
The generated text is in the text element.
8999
* "status": a STRING value that contains the API response status for the corresponding row. This value is empty if the operation was successful.
90100
"""
@@ -104,7 +114,7 @@ def generate_bool(
104114

105115

106116
def _separate_context_and_series(
107-
prompt: series.Series | List[str | series.Series] | Tuple[str | series.Series, ...],
117+
prompt: PROMPT_TYPE,
108118
) -> Tuple[List[str | None], List[series.Series]]:
109119
"""
110120
Returns the two values. The first value is the prompt with all series replaced by None. The second value is all the series
@@ -123,18 +133,19 @@ def _separate_context_and_series(
123133
return [None], [prompt]
124134

125135
prompt_context: List[str | None] = []
126-
series_list: List[series.Series] = []
136+
series_list: List[series.Series | pd.Series] = []
127137

138+
session = None
128139
for item in prompt:
129140
if isinstance(item, str):
130141
prompt_context.append(item)
131142

132-
elif isinstance(item, series.Series):
143+
elif isinstance(item, (series.Series, pd.Series)):
133144
prompt_context.append(None)
134145

135-
if item.dtype == dtypes.OBJ_REF_DTYPE:
136-
# Multi-model support
137-
item = item.blob.read_url()
146+
if isinstance(item, series.Series) and session is None:
147+
# Use the first available BF session if there's any.
148+
session = item._session
138149
series_list.append(item)
139150

140151
else:
@@ -143,7 +154,20 @@ def _separate_context_and_series(
143154
if not series_list:
144155
raise ValueError("Please provide at least one Series in the prompt")
145156

146-
return prompt_context, series_list
157+
converted_list = [_convert_series(s, session) for s in series_list]
158+
159+
return prompt_context, converted_list
160+
161+
162+
def _convert_series(
163+
s: series.Series | pd.Series, session: session.Session | None
164+
) -> series.Series:
165+
result = convert.to_bf_series(s, default_index=None, session=session)
166+
167+
if result.dtype == dtypes.OBJ_REF_DTYPE:
168+
# Support multimodel
169+
return result.blob.read_url()
170+
return result
147171

148172

149173
def _resolve_connection_id(series: series.Series, connection_id: str | None):

bigframes/core/compile/sqlglot/__init__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,18 @@
1414
from __future__ import annotations
1515

1616
from bigframes.core.compile.sqlglot.compiler import SQLGlotCompiler
17+
import bigframes.core.compile.sqlglot.expressions.array_ops # noqa: F401
1718
import bigframes.core.compile.sqlglot.expressions.binary_compiler # noqa: F401
18-
import bigframes.core.compile.sqlglot.expressions.unary_compiler # noqa: F401
19+
import bigframes.core.compile.sqlglot.expressions.blob_ops # noqa: F401
20+
import bigframes.core.compile.sqlglot.expressions.comparison_ops # noqa: F401
21+
import bigframes.core.compile.sqlglot.expressions.date_ops # noqa: F401
22+
import bigframes.core.compile.sqlglot.expressions.datetime_ops # noqa: F401
23+
import bigframes.core.compile.sqlglot.expressions.generic_ops # noqa: F401
24+
import bigframes.core.compile.sqlglot.expressions.geo_ops # noqa: F401
25+
import bigframes.core.compile.sqlglot.expressions.json_ops # noqa: F401
26+
import bigframes.core.compile.sqlglot.expressions.numeric_ops # noqa: F401
27+
import bigframes.core.compile.sqlglot.expressions.string_ops # noqa: F401
28+
import bigframes.core.compile.sqlglot.expressions.struct_ops # noqa: F401
29+
import bigframes.core.compile.sqlglot.expressions.timedelta_ops # noqa: F401
1930

2031
__all__ = ["SQLGlotCompiler"]
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import typing
18+
19+
import sqlglot
20+
import sqlglot.expressions as sge
21+
22+
from bigframes import operations as ops
23+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
24+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
25+
26+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
27+
28+
29+
@register_unary_op(ops.ArrayToStringOp, pass_op=True)
30+
def _(expr: TypedExpr, op: ops.ArrayToStringOp) -> sge.Expression:
31+
return sge.ArrayToString(this=expr.expr, expression=f"'{op.delimiter}'")
32+
33+
34+
@register_unary_op(ops.ArrayIndexOp, pass_op=True)
35+
def _(expr: TypedExpr, op: ops.ArrayIndexOp) -> sge.Expression:
36+
return sge.Bracket(
37+
this=expr.expr,
38+
expressions=[sge.Literal.number(op.index)],
39+
safe=True,
40+
offset=False,
41+
)
42+
43+
44+
@register_unary_op(ops.ArraySliceOp, pass_op=True)
45+
def _(expr: TypedExpr, op: ops.ArraySliceOp) -> sge.Expression:
46+
slice_idx = sqlglot.to_identifier("slice_idx")
47+
48+
conditions: typing.List[sge.Predicate] = [slice_idx >= op.start]
49+
50+
if op.stop is not None:
51+
conditions.append(slice_idx < op.stop)
52+
53+
# local name for each element in the array
54+
el = sqlglot.to_identifier("el")
55+
56+
selected_elements = (
57+
sge.select(el)
58+
.from_(
59+
sge.Unnest(
60+
expressions=[expr.expr],
61+
alias=sge.TableAlias(columns=[el]),
62+
offset=slice_idx,
63+
)
64+
)
65+
.where(*conditions)
66+
)
67+
68+
return sge.array(selected_elements)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import sqlglot.expressions as sge
18+
19+
from bigframes import operations as ops
20+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
21+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
22+
23+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
24+
25+
26+
@register_unary_op(ops.obj_fetch_metadata_op)
27+
def _(expr: TypedExpr) -> sge.Expression:
28+
return sge.func("OBJ.FETCH_METADATA", expr.expr)
29+
30+
31+
@register_unary_op(ops.ObjGetAccessUrl)
32+
def _(expr: TypedExpr) -> sge.Expression:
33+
return sge.func("OBJ.GET_ACCESS_URL", expr.expr)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import typing
18+
19+
import pandas as pd
20+
import sqlglot.expressions as sge
21+
22+
from bigframes import operations as ops
23+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
24+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
25+
import bigframes.dtypes as dtypes
26+
27+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
28+
29+
30+
@register_unary_op(ops.IsInOp, pass_op=True)
31+
def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression:
32+
values = []
33+
is_numeric_expr = dtypes.is_numeric(expr.dtype)
34+
for value in op.values:
35+
if value is None:
36+
continue
37+
dtype = dtypes.bigframes_type(type(value))
38+
if expr.dtype == dtype or is_numeric_expr and dtypes.is_numeric(dtype):
39+
values.append(sge.convert(value))
40+
41+
if op.match_nulls:
42+
contains_nulls = any(_is_null(value) for value in op.values)
43+
if contains_nulls:
44+
return sge.Is(this=expr.expr, expression=sge.Null()) | sge.In(
45+
this=expr.expr, expressions=values
46+
)
47+
48+
if len(values) == 0:
49+
return sge.convert(False)
50+
51+
return sge.func(
52+
"COALESCE", sge.In(this=expr.expr, expressions=values), sge.convert(False)
53+
)
54+
55+
56+
# Helpers
57+
def _is_null(value) -> bool:
58+
# float NaN/inf should be treated as distinct from 'true' null values
59+
return typing.cast(bool, pd.isna(value)) and not isinstance(value, float)
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import sqlglot.expressions as sge
18+
19+
from bigframes import operations as ops
20+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
21+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
22+
23+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
24+
25+
26+
@register_unary_op(ops.date_op)
27+
def _(expr: TypedExpr) -> sge.Expression:
28+
return sge.Date(this=expr.expr)
29+
30+
31+
@register_unary_op(ops.day_op)
32+
def _(expr: TypedExpr) -> sge.Expression:
33+
return sge.Extract(this=sge.Identifier(this="DAY"), expression=expr.expr)
34+
35+
36+
@register_unary_op(ops.dayofweek_op)
37+
def _(expr: TypedExpr) -> sge.Expression:
38+
# Adjust the 1-based day-of-week index (from SQL) to a 0-based index.
39+
return sge.Extract(
40+
this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr
41+
) - sge.convert(1)
42+
43+
44+
@register_unary_op(ops.dayofyear_op)
45+
def _(expr: TypedExpr) -> sge.Expression:
46+
return sge.Extract(this=sge.Identifier(this="DAYOFYEAR"), expression=expr.expr)
47+
48+
49+
@register_unary_op(ops.iso_day_op)
50+
def _(expr: TypedExpr) -> sge.Expression:
51+
return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr)
52+
53+
54+
@register_unary_op(ops.iso_week_op)
55+
def _(expr: TypedExpr) -> sge.Expression:
56+
return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr)
57+
58+
59+
@register_unary_op(ops.iso_year_op)
60+
def _(expr: TypedExpr) -> sge.Expression:
61+
return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr)

0 commit comments

Comments
 (0)