Skip to content

Commit 98ea48b

Browse files
Merge remote-tracking branch 'github/main' into cast_ibis_error_fix
2 parents b102b1a + c3c292c commit 98ea48b

File tree

46 files changed

+1650
-83
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1650
-83
lines changed

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,23 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.23.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.22.0...v2.23.0) (2025-09-29)
8+
9+
10+
### Features
11+
12+
* Add ai.generate_double to bigframes.bigquery package ([#2111](https://github.com/googleapis/python-bigquery-dataframes/issues/2111)) ([6b8154c](https://github.com/googleapis/python-bigquery-dataframes/commit/6b8154c578bb1a276e9cf8fe494d91f8cd6260f2))
13+
14+
15+
### Bug Fixes
16+
17+
* Prevent invalid syntax for no-op .replace ops ([#2112](https://github.com/googleapis/python-bigquery-dataframes/issues/2112)) ([c311876](https://github.com/googleapis/python-bigquery-dataframes/commit/c311876b2adbc0b66ae5e463c6e56466c6a6a495))
18+
19+
20+
### Documentation
21+
22+
* Add timedelta notebook sample ([#2124](https://github.com/googleapis/python-bigquery-dataframes/issues/2124)) ([d1a9888](https://github.com/googleapis/python-bigquery-dataframes/commit/d1a9888a2b47de6aca5dddc94d0c8f280344b58a))
23+
724
## [2.22.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.21.0...v2.22.0) (2025-09-25)
825

926

bigframes/bigquery/_operations/ai.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,80 @@
3535
]
3636

3737

38+
@log_adapter.method_logger(custom_base_name="bigquery_ai")
39+
def generate(
40+
prompt: PROMPT_TYPE,
41+
*,
42+
connection_id: str | None = None,
43+
endpoint: str | None = None,
44+
request_type: Literal["dedicated", "shared", "unspecified"] = "unspecified",
45+
model_params: Mapping[Any, Any] | None = None,
46+
# TODO(b/446974666) Add output_schema parameter
47+
) -> series.Series:
48+
"""
49+
Returns the AI analysis based on the prompt, which can be any combination of text and unstructured data.
50+
51+
**Examples:**
52+
53+
>>> import bigframes.pandas as bpd
54+
>>> import bigframes.bigquery as bbq
55+
>>> bpd.options.display.progress_bar = None
56+
>>> country = bpd.Series(["Japan", "Canada"])
57+
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only"))
58+
0 {'result': 'Tokyo\\n', 'full_response': '{"cand...
59+
1 {'result': 'Ottawa\\n', 'full_response': '{"can...
60+
dtype: struct<result: string, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow]
61+
62+
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only")).struct.field("result")
63+
0 Tokyo\\n
64+
1 Ottawa\\n
65+
Name: result, dtype: string
66+
67+
Args:
68+
prompt (Series | List[str|Series] | Tuple[str|Series, ...]):
69+
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
70+
or pandas Series.
71+
connection_id (str, optional):
72+
Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
73+
If not provided, the connection from the current session will be used.
74+
endpoint (str, optional):
75+
Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any
76+
generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and
77+
uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML selects a recent stable
78+
version of Gemini to use.
79+
request_type (Literal["dedicated", "shared", "unspecified"]):
80+
Specifies the type of inference request to send to the Gemini model. The request type determines what quota the request uses.
81+
* "dedicated": function only uses Provisioned Throughput quota. The function returns the error Provisioned throughput is not
82+
purchased or is not active if Provisioned Throughput quota isn't available.
83+
* "shared": the function only uses dynamic shared quota (DSQ), even if you have purchased Provisioned Throughput quota.
84+
* "unspecified": If you haven't purchased Provisioned Throughput quota, the function uses DSQ quota.
85+
If you have purchased Provisioned Throughput quota, the function uses the Provisioned Throughput quota first.
86+
If requests exceed the Provisioned Throughput quota, the overflow traffic uses DSQ quota.
87+
model_params (Mapping[Any, Any]):
88+
Provides additional parameters to the model. The MODEL_PARAMS value must conform to the generateContent request body format.
89+
90+
Returns:
91+
bigframes.series.Series: A new struct Series with the result data. The struct contains these fields:
92+
* "result": a STRING value containing the model's response to the prompt. The result is None if the request fails or is filtered by responsible AI.
93+
* "full_response": a JSON value containing the response from the projects.locations.endpoints.generateContent call to the model.
94+
The generated text is in the text element.
95+
* "status": a STRING value that contains the API response status for the corresponding row. This value is empty if the operation was successful.
96+
"""
97+
98+
prompt_context, series_list = _separate_context_and_series(prompt)
99+
assert len(series_list) > 0
100+
101+
operator = ai_ops.AIGenerate(
102+
prompt_context=tuple(prompt_context),
103+
connection_id=_resolve_connection_id(series_list[0], connection_id),
104+
endpoint=endpoint,
105+
request_type=request_type,
106+
model_params=json.dumps(model_params) if model_params else None,
107+
)
108+
109+
return series_list[0]._apply_nary_op(operator, series_list[1:])
110+
111+
38112
@log_adapter.method_logger(custom_base_name="bigquery_ai")
39113
def generate_bool(
40114
prompt: PROMPT_TYPE,

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,6 +1173,10 @@ def udf(*inputs):
11731173

11741174
@scalar_op_compiler.register_unary_op(ops.MapOp, pass_op=True)
11751175
def map_op_impl(x: ibis_types.Value, op: ops.MapOp):
1176+
# this should probably be handled by a rewriter
1177+
if len(op.mappings) == 0:
1178+
return x
1179+
11761180
case = ibis_api.case()
11771181
for mapping in op.mappings:
11781182
case = case.when(x == mapping[0], mapping[1])
@@ -1970,6 +1974,20 @@ def struct_op_impl(
19701974
return ibis_types.struct(data)
19711975

19721976

1977+
@scalar_op_compiler.register_nary_op(ops.AIGenerate, pass_op=True)
1978+
def ai_generate(
1979+
*values: ibis_types.Value, op: ops.AIGenerate
1980+
) -> ibis_types.StructValue:
1981+
1982+
return ai_ops.AIGenerate(
1983+
_construct_prompt(values, op.prompt_context), # type: ignore
1984+
op.connection_id, # type: ignore
1985+
op.endpoint, # type: ignore
1986+
op.request_type.upper(), # type: ignore
1987+
op.model_params, # type: ignore
1988+
).to_expr()
1989+
1990+
19731991
@scalar_op_compiler.register_nary_op(ops.AIGenerateBool, pass_op=True)
19741992
def ai_generate_bool(
19751993
*values: ibis_types.Value, op: ops.AIGenerateBool

bigframes/core/compile/sqlglot/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import bigframes.core.compile.sqlglot.expressions.ai_ops # noqa: F401
1818
import bigframes.core.compile.sqlglot.expressions.array_ops # noqa: F401
1919
import bigframes.core.compile.sqlglot.expressions.blob_ops # noqa: F401
20+
import bigframes.core.compile.sqlglot.expressions.bool_ops # noqa: F401
2021
import bigframes.core.compile.sqlglot.expressions.comparison_ops # noqa: F401
2122
import bigframes.core.compile.sqlglot.expressions.date_ops # noqa: F401
2223
import bigframes.core.compile.sqlglot.expressions.datetime_ops # noqa: F401

bigframes/core/compile/sqlglot/aggregations/unary_compiler.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,17 @@ def compile(
3838
return UNARY_OP_REGISTRATION[op](op, column, window=window)
3939

4040

41+
@UNARY_OP_REGISTRATION.register(agg_ops.AllOp)
42+
def _(
43+
op: agg_ops.AllOp,
44+
column: typed_expr.TypedExpr,
45+
window: typing.Optional[window_spec.WindowSpec] = None,
46+
) -> sge.Expression:
47+
# BQ will return null for empty column, result would be false in pandas.
48+
result = apply_window_if_present(sge.func("LOGICAL_AND", column.expr), window)
49+
return sge.func("IFNULL", result, sge.true())
50+
51+
4152
@UNARY_OP_REGISTRATION.register(agg_ops.ApproxQuartilesOp)
4253
def _(
4354
op: agg_ops.ApproxQuartilesOp,
@@ -69,6 +80,15 @@ def _(
6980
return sge.func("APPROX_TOP_COUNT", column.expr, sge.convert(op.number))
7081

7182

83+
@UNARY_OP_REGISTRATION.register(agg_ops.AnyValueOp)
84+
def _(
85+
op: agg_ops.AnyValueOp,
86+
column: typed_expr.TypedExpr,
87+
window: typing.Optional[window_spec.WindowSpec] = None,
88+
) -> sge.Expression:
89+
return apply_window_if_present(sge.func("ANY_VALUE", column.expr), window)
90+
91+
7292
@UNARY_OP_REGISTRATION.register(agg_ops.CountOp)
7393
def _(
7494
op: agg_ops.CountOp,

bigframes/core/compile/sqlglot/expressions/ai_ops.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@
2626
register_nary_op = scalar_compiler.scalar_op_compiler.register_nary_op
2727

2828

29+
@register_nary_op(ops.AIGenerate, pass_op=True)
30+
def _(*exprs: TypedExpr, op: ops.AIGenerate) -> sge.Expression:
31+
args = [_construct_prompt(exprs, op.prompt_context)] + _construct_named_args(op)
32+
33+
return sge.func("AI.GENERATE", *args)
34+
35+
2936
@register_nary_op(ops.AIGenerateBool, pass_op=True)
3037
def _(*exprs: TypedExpr, op: ops.AIGenerateBool) -> sge.Expression:
3138
args = [_construct_prompt(exprs, op.prompt_context)] + _construct_named_args(op)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import sqlglot.expressions as sge
18+
19+
from bigframes import dtypes
20+
from bigframes import operations as ops
21+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
22+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
23+
24+
register_binary_op = scalar_compiler.scalar_op_compiler.register_binary_op
25+
26+
27+
@register_binary_op(ops.and_op)
28+
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
29+
if left.dtype == dtypes.BOOL_DTYPE and right.dtype == dtypes.BOOL_DTYPE:
30+
return sge.And(this=left.expr, expression=right.expr)
31+
return sge.BitwiseAnd(this=left.expr, expression=right.expr)
32+
33+
34+
@register_binary_op(ops.or_op)
35+
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
36+
if left.dtype == dtypes.BOOL_DTYPE and right.dtype == dtypes.BOOL_DTYPE:
37+
return sge.Or(this=left.expr, expression=right.expr)
38+
return sge.BitwiseOr(this=left.expr, expression=right.expr)
39+
40+
41+
@register_binary_op(ops.xor_op)
42+
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
43+
if left.dtype == dtypes.BOOL_DTYPE and right.dtype == dtypes.BOOL_DTYPE:
44+
left_expr = sge.And(this=left.expr, expression=sge.Not(this=right.expr))
45+
right_expr = sge.And(this=sge.Not(this=left.expr), expression=right.expr)
46+
return sge.Or(this=left_expr, expression=right_expr)
47+
return sge.BitwiseXor(this=left.expr, expression=right.expr)

bigframes/core/compile/sqlglot/expressions/generic_ops.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ def _(expr: TypedExpr) -> sge.Expression:
7878

7979
@register_unary_op(ops.MapOp, pass_op=True)
8080
def _(expr: TypedExpr, op: ops.MapOp) -> sge.Expression:
81+
if len(op.mappings) == 0:
82+
return expr.expr
8183
return sge.Case(
8284
this=expr.expr,
8385
ifs=[

bigframes/core/compile/sqlglot/expressions/numeric_ops.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,49 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
323323
return result
324324

325325

326+
@register_binary_op(ops.mod_op)
327+
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
328+
# In BigQuery returned value has the same sign as X. In pandas, the sign of y is used, so we need to flip the result if sign(x) != sign(y)
329+
left_expr = _coerce_bool_to_int(left)
330+
right_expr = _coerce_bool_to_int(right)
331+
332+
# BigQuery MOD function doesn't support float types, so cast to BIGNUMERIC
333+
if left.dtype == dtypes.FLOAT_DTYPE or right.dtype == dtypes.FLOAT_DTYPE:
334+
left_expr = sge.Cast(this=left_expr, to="BIGNUMERIC")
335+
right_expr = sge.Cast(this=right_expr, to="BIGNUMERIC")
336+
337+
# MOD(N, 0) will error in bigquery, but needs to return null
338+
bq_mod = sge.Mod(this=left_expr, expression=right_expr)
339+
zero_result = (
340+
constants._NAN
341+
if (left.dtype == dtypes.FLOAT_DTYPE or right.dtype == dtypes.FLOAT_DTYPE)
342+
else constants._ZERO
343+
)
344+
return sge.Case(
345+
ifs=[
346+
sge.If(
347+
this=sge.EQ(this=right_expr, expression=constants._ZERO),
348+
true=zero_result * left_expr,
349+
),
350+
sge.If(
351+
this=sge.and_(
352+
right_expr < constants._ZERO,
353+
bq_mod > constants._ZERO,
354+
),
355+
true=right_expr + bq_mod,
356+
),
357+
sge.If(
358+
this=sge.and_(
359+
right_expr > constants._ZERO,
360+
bq_mod < constants._ZERO,
361+
),
362+
true=right_expr + bq_mod,
363+
),
364+
],
365+
default=bq_mod,
366+
)
367+
368+
326369
@register_binary_op(ops.mul_op)
327370
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
328371
left_expr = _coerce_bool_to_int(left)

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def from_union(
175175
), f"At least two select expressions must be provided, but got {selects}."
176176

177177
existing_ctes: list[sge.CTE] = []
178-
union_selects: list[sge.Select] = []
178+
union_selects: list[sge.Expression] = []
179179
for select in selects:
180180
assert isinstance(
181181
select, sge.Select
@@ -204,10 +204,14 @@ def from_union(
204204
sge.Select().select(*selections).from_(sge.Table(this=new_cte_name))
205205
)
206206

207-
union_expr = sg.union(
208-
*union_selects,
209-
distinct=False,
210-
copy=False,
207+
union_expr = typing.cast(
208+
sge.Select,
209+
functools.reduce(
210+
lambda x, y: sge.Union(
211+
this=x, expression=y, distinct=False, copy=False
212+
),
213+
union_selects,
214+
),
211215
)
212216
final_select_expr = sge.Select().select(sge.Star()).from_(union_expr.subquery())
213217
final_select_expr.set("with", sge.With(expressions=existing_ctes))

0 commit comments

Comments
 (0)