Skip to content

Commit a855c52

Browse files
authored
Merge branch 'main' into main_chelsealin_bool
2 parents 9bc531d + 6b8154c commit a855c52

File tree

32 files changed

+677
-35
lines changed

32 files changed

+677
-35
lines changed

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,27 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.22.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.21.0...v2.22.0) (2025-09-25)
8+
9+
10+
### Features
11+
12+
* Add `GroupBy.__iter__` ([#1394](https://github.com/googleapis/python-bigquery-dataframes/issues/1394)) ([c56a78c](https://github.com/googleapis/python-bigquery-dataframes/commit/c56a78cd509a535d4998d5b9a99ec3ecd334b883))
13+
* Add ai.generate_int to bigframes.bigquery package ([#2109](https://github.com/googleapis/python-bigquery-dataframes/issues/2109)) ([af6b862](https://github.com/googleapis/python-bigquery-dataframes/commit/af6b862de5c3921684210ec169338815f45b19dd))
14+
* Add Groupby.describe() ([#2088](https://github.com/googleapis/python-bigquery-dataframes/issues/2088)) ([328a765](https://github.com/googleapis/python-bigquery-dataframes/commit/328a765e746138806a021bea22475e8c03512aeb))
15+
* Implement `Index.to_list()` ([#2106](https://github.com/googleapis/python-bigquery-dataframes/issues/2106)) ([60056ca](https://github.com/googleapis/python-bigquery-dataframes/commit/60056ca06511f99092647fe55fc02eeab486b4ca))
16+
* Implement inplace parameter for `DataFrame.drop` ([#2105](https://github.com/googleapis/python-bigquery-dataframes/issues/2105)) ([3487f13](https://github.com/googleapis/python-bigquery-dataframes/commit/3487f13d12e34999b385c2e11551b5e27bfbf4ff))
17+
* Support callable for series map method ([#2100](https://github.com/googleapis/python-bigquery-dataframes/issues/2100)) ([ac25618](https://github.com/googleapis/python-bigquery-dataframes/commit/ac25618feed2da11fe4fb85058d498d262c085c0))
18+
* Support df.info() with null index ([#2094](https://github.com/googleapis/python-bigquery-dataframes/issues/2094)) ([fb81eea](https://github.com/googleapis/python-bigquery-dataframes/commit/fb81eeaf13af059f32cb38e7f117fb3504243d51))
19+
20+
21+
### Bug Fixes
22+
23+
* Avoid ibis fillna warning in compiler ([#2113](https://github.com/googleapis/python-bigquery-dataframes/issues/2113)) ([7ef667b](https://github.com/googleapis/python-bigquery-dataframes/commit/7ef667b0f46f13bcc8ad4f2ed8f81278132b5aec))
24+
* Negative start and stop parameter values in Series.str.slice() ([#2104](https://github.com/googleapis/python-bigquery-dataframes/issues/2104)) ([f57a348](https://github.com/googleapis/python-bigquery-dataframes/commit/f57a348f1935a4e2bb14c501bb4c47cd552d102a))
25+
* Throw type error for incomparable join keys ([#2098](https://github.com/googleapis/python-bigquery-dataframes/issues/2098)) ([9dc9695](https://github.com/googleapis/python-bigquery-dataframes/commit/9dc96959a84b751d18b290129c2926df6e50b3f5))
26+
* Transformers with non-standard column names throw errors ([#2089](https://github.com/googleapis/python-bigquery-dataframes/issues/2089)) ([a2daa3f](https://github.com/googleapis/python-bigquery-dataframes/commit/a2daa3fffe6743327edb9f4c74db93198bd12f8e))
27+
728
## [2.21.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.20.0...v2.21.0) (2025-09-17)
829

930

bigframes/bigquery/_operations/ai.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,81 @@ def generate_int(
188188
return series_list[0]._apply_nary_op(operator, series_list[1:])
189189

190190

191+
@log_adapter.method_logger(custom_base_name="bigquery_ai")
192+
def generate_double(
193+
prompt: PROMPT_TYPE,
194+
*,
195+
connection_id: str | None = None,
196+
endpoint: str | None = None,
197+
request_type: Literal["dedicated", "shared", "unspecified"] = "unspecified",
198+
model_params: Mapping[Any, Any] | None = None,
199+
) -> series.Series:
200+
"""
201+
Returns the AI analysis based on the prompt, which can be any combination of text and unstructured data.
202+
203+
**Examples:**
204+
205+
>>> import bigframes.pandas as bpd
206+
>>> import bigframes.bigquery as bbq
207+
>>> bpd.options.display.progress_bar = None
208+
>>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"])
209+
>>> bbq.ai.generate_double(("How many legs does a ", animal, " have?"))
210+
0 {'result': 2.0, 'full_response': '{"candidates...
211+
1 {'result': 4.0, 'full_response': '{"candidates...
212+
2 {'result': 8.0, 'full_response': '{"candidates...
213+
dtype: struct<result: double, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow]
214+
215+
>>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")).struct.field("result")
216+
0 2.0
217+
1 4.0
218+
2 8.0
219+
Name: result, dtype: Float64
220+
221+
Args:
222+
prompt (Series | List[str|Series] | Tuple[str|Series, ...]):
223+
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
224+
or pandas Series.
225+
connection_id (str, optional):
226+
Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
227+
If not provided, the connection from the current session will be used.
228+
endpoint (str, optional):
229+
Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any
230+
generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and
231+
uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML selects a recent stable
232+
version of Gemini to use.
233+
request_type (Literal["dedicated", "shared", "unspecified"]):
234+
Specifies the type of inference request to send to the Gemini model. The request type determines what quota the request uses.
235+
* "dedicated": function only uses Provisioned Throughput quota. The function returns the error Provisioned throughput is not
236+
purchased or is not active if Provisioned Throughput quota isn't available.
237+
* "shared": the function only uses dynamic shared quota (DSQ), even if you have purchased Provisioned Throughput quota.
238+
* "unspecified": If you haven't purchased Provisioned Throughput quota, the function uses DSQ quota.
239+
If you have purchased Provisioned Throughput quota, the function uses the Provisioned Throughput quota first.
240+
If requests exceed the Provisioned Throughput quota, the overflow traffic uses DSQ quota.
241+
model_params (Mapping[Any, Any]):
242+
Provides additional parameters to the model. The MODEL_PARAMS value must conform to the generateContent request body format.
243+
244+
Returns:
245+
bigframes.series.Series: A new struct Series with the result data. The struct contains these fields:
246+
* "result": an DOUBLE value containing the model's response to the prompt. The result is None if the request fails or is filtered by responsible AI.
247+
* "full_response": a JSON value containing the response from the projects.locations.endpoints.generateContent call to the model.
248+
The generated text is in the text element.
249+
* "status": a STRING value that contains the API response status for the corresponding row. This value is empty if the operation was successful.
250+
"""
251+
252+
prompt_context, series_list = _separate_context_and_series(prompt)
253+
assert len(series_list) > 0
254+
255+
operator = ai_ops.AIGenerateDouble(
256+
prompt_context=tuple(prompt_context),
257+
connection_id=_resolve_connection_id(series_list[0], connection_id),
258+
endpoint=endpoint,
259+
request_type=request_type,
260+
model_params=json.dumps(model_params) if model_params else None,
261+
)
262+
263+
return series_list[0]._apply_nary_op(operator, series_list[1:])
264+
265+
191266
def _separate_context_and_series(
192267
prompt: PROMPT_TYPE,
193268
) -> Tuple[List[str | None], List[series.Series]]:

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1986,7 +1986,7 @@ def ai_generate_bool(
19861986

19871987
@scalar_op_compiler.register_nary_op(ops.AIGenerateInt, pass_op=True)
19881988
def ai_generate_int(
1989-
*values: ibis_types.Value, op: ops.AIGenerateBool
1989+
*values: ibis_types.Value, op: ops.AIGenerateInt
19901990
) -> ibis_types.StructValue:
19911991

19921992
return ai_ops.AIGenerateInt(
@@ -1998,6 +1998,20 @@ def ai_generate_int(
19981998
).to_expr()
19991999

20002000

2001+
@scalar_op_compiler.register_nary_op(ops.AIGenerateDouble, pass_op=True)
2002+
def ai_generate_double(
2003+
*values: ibis_types.Value, op: ops.AIGenerateDouble
2004+
) -> ibis_types.StructValue:
2005+
2006+
return ai_ops.AIGenerateDouble(
2007+
_construct_prompt(values, op.prompt_context), # type: ignore
2008+
op.connection_id, # type: ignore
2009+
op.endpoint, # type: ignore
2010+
op.request_type.upper(), # type: ignore
2011+
op.model_params, # type: ignore
2012+
).to_expr()
2013+
2014+
20012015
def _construct_prompt(
20022016
col_refs: tuple[ibis_types.Value], prompt_context: tuple[str | None]
20032017
) -> ibis_types.StructValue:

bigframes/core/compile/sqlglot/aggregate_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def compile_analytic(
6363
window: window_spec.WindowSpec,
6464
) -> sge.Expression:
6565
if isinstance(aggregate, agg_expressions.NullaryAggregation):
66-
return nullary_compiler.compile(aggregate.op)
66+
return nullary_compiler.compile(aggregate.op, window)
6767
if isinstance(aggregate, agg_expressions.UnaryAggregation):
6868
column = typed_expr.TypedExpr(
6969
scalar_compiler.scalar_op_compiler.compile_expression(aggregate.arg),

bigframes/core/compile/sqlglot/aggregations/binary_compiler.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from bigframes.core import window_spec
2222
import bigframes.core.compile.sqlglot.aggregations.op_registration as reg
23+
from bigframes.core.compile.sqlglot.aggregations.windows import apply_window_if_present
2324
import bigframes.core.compile.sqlglot.expressions.typed_expr as typed_expr
2425
from bigframes.operations import aggregations as agg_ops
2526

@@ -33,3 +34,25 @@ def compile(
3334
window: typing.Optional[window_spec.WindowSpec] = None,
3435
) -> sge.Expression:
3536
return BINARY_OP_REGISTRATION[op](op, left, right, window=window)
37+
38+
39+
@BINARY_OP_REGISTRATION.register(agg_ops.CorrOp)
40+
def _(
41+
op: agg_ops.CorrOp,
42+
left: typed_expr.TypedExpr,
43+
right: typed_expr.TypedExpr,
44+
window: typing.Optional[window_spec.WindowSpec] = None,
45+
) -> sge.Expression:
46+
result = sge.func("CORR", left.expr, right.expr)
47+
return apply_window_if_present(result, window)
48+
49+
50+
@BINARY_OP_REGISTRATION.register(agg_ops.CovOp)
51+
def _(
52+
op: agg_ops.CovOp,
53+
left: typed_expr.TypedExpr,
54+
right: typed_expr.TypedExpr,
55+
window: typing.Optional[window_spec.WindowSpec] = None,
56+
) -> sge.Expression:
57+
result = sge.func("COVAR_SAMP", left.expr, right.expr)
58+
return apply_window_if_present(result, window)

bigframes/core/compile/sqlglot/aggregations/nullary_compiler.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,15 @@ def _(
3939
window: typing.Optional[window_spec.WindowSpec] = None,
4040
) -> sge.Expression:
4141
return apply_window_if_present(sge.func("COUNT", sge.convert(1)), window)
42+
43+
44+
@NULLARY_OP_REGISTRATION.register(agg_ops.RowNumberOp)
45+
def _(
46+
op: agg_ops.RowNumberOp,
47+
window: typing.Optional[window_spec.WindowSpec] = None,
48+
) -> sge.Expression:
49+
result: sge.Expression = sge.func("ROW_NUMBER")
50+
if window is None:
51+
# ROW_NUMBER always needs an OVER clause.
52+
return sge.Window(this=result)
53+
return apply_window_if_present(result, window)

bigframes/core/compile/sqlglot/aggregations/op_registration.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,22 +41,16 @@ def arg_checker(*args, **kwargs):
4141
)
4242
return item(*args, **kwargs)
4343

44-
if hasattr(op, "name"):
45-
key = typing.cast(str, op.name)
46-
if key in self._registered_ops:
47-
raise ValueError(f"{key} is already registered")
48-
else:
49-
raise ValueError(f"The operator must have a 'name' attribute. Got {op}")
44+
key = str(op)
45+
if key in self._registered_ops:
46+
raise ValueError(f"{key} is already registered")
5047
self._registered_ops[key] = item
5148
return arg_checker
5249

5350
return decorator
5451

5552
def __getitem__(self, op: str | agg_ops.WindowOp) -> CompilationFunc:
56-
if isinstance(op, agg_ops.WindowOp):
57-
if not hasattr(op, "name"):
58-
raise ValueError(f"The operator must have a 'name' attribute. Got {op}")
59-
else:
60-
key = typing.cast(str, op.name)
61-
return self._registered_ops[key]
62-
return self._registered_ops[op]
53+
key = op if isinstance(op, type) else type(op)
54+
if str(key) not in self._registered_ops:
55+
raise ValueError(f"{key} is already not registered")
56+
return self._registered_ops[str(key)]

bigframes/core/compile/sqlglot/aggregations/unary_compiler.py

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,37 @@ def compile(
3838
return UNARY_OP_REGISTRATION[op](op, column, window=window)
3939

4040

41+
@UNARY_OP_REGISTRATION.register(agg_ops.ApproxQuartilesOp)
42+
def _(
43+
op: agg_ops.ApproxQuartilesOp,
44+
column: typed_expr.TypedExpr,
45+
window: typing.Optional[window_spec.WindowSpec] = None,
46+
) -> sge.Expression:
47+
if window is not None:
48+
raise NotImplementedError("Approx Quartiles with windowing is not supported.")
49+
# APPROX_QUANTILES returns an array of the quartiles, so we need to index it.
50+
# The op.quartile is 1-based for the quartile, but array is 0-indexed.
51+
# The quartiles are Q0, Q1, Q2, Q3, Q4. op.quartile is 1, 2, or 3.
52+
# The array has 5 elements (for N=4 intervals).
53+
# So we want the element at index `op.quartile`.
54+
approx_quantiles_expr = sge.func("APPROX_QUANTILES", column.expr, sge.convert(4))
55+
return sge.Bracket(
56+
this=approx_quantiles_expr,
57+
expressions=[sge.func("OFFSET", sge.convert(op.quartile))],
58+
)
59+
60+
61+
@UNARY_OP_REGISTRATION.register(agg_ops.ApproxTopCountOp)
62+
def _(
63+
op: agg_ops.ApproxTopCountOp,
64+
column: typed_expr.TypedExpr,
65+
window: typing.Optional[window_spec.WindowSpec] = None,
66+
) -> sge.Expression:
67+
if window is not None:
68+
raise NotImplementedError("Approx top count with windowing is not supported.")
69+
return sge.func("APPROX_TOP_COUNT", column.expr, sge.convert(op.number))
70+
71+
4172
@UNARY_OP_REGISTRATION.register(agg_ops.CountOp)
4273
def _(
4374
op: agg_ops.CountOp,
@@ -53,10 +84,7 @@ def _(
5384
column: typed_expr.TypedExpr,
5485
window: typing.Optional[window_spec.WindowSpec] = None,
5586
) -> sge.Expression:
56-
# Ranking functions do not support window framing clauses.
57-
return apply_window_if_present(
58-
sge.func("DENSE_RANK"), window, include_framing_clauses=False
59-
)
87+
return apply_window_if_present(sge.func("DENSE_RANK"), window)
6088

6189

6290
@UNARY_OP_REGISTRATION.register(agg_ops.MaxOp)
@@ -109,13 +137,23 @@ def _(
109137
return apply_window_if_present(sge.func("MIN", column.expr), window)
110138

111139

112-
@UNARY_OP_REGISTRATION.register(agg_ops.SizeUnaryOp)
140+
@UNARY_OP_REGISTRATION.register(agg_ops.QuantileOp)
113141
def _(
114-
op: agg_ops.SizeUnaryOp,
115-
_,
142+
op: agg_ops.QuantileOp,
143+
column: typed_expr.TypedExpr,
116144
window: typing.Optional[window_spec.WindowSpec] = None,
117145
) -> sge.Expression:
118-
return apply_window_if_present(sge.func("COUNT", sge.convert(1)), window)
146+
# TODO: Support interpolation argument
147+
# TODO: Support percentile_disc
148+
result: sge.Expression = sge.func("PERCENTILE_CONT", column.expr, sge.convert(op.q))
149+
if window is None:
150+
# PERCENTILE_CONT is a navigation function, not an aggregate function, so it always needs an OVER clause.
151+
result = sge.Window(this=result)
152+
else:
153+
result = apply_window_if_present(result, window)
154+
if op.should_floor_result:
155+
result = sge.Cast(this=sge.func("FLOOR", result), to="INT64")
156+
return result
119157

120158

121159
@UNARY_OP_REGISTRATION.register(agg_ops.RankOp)
@@ -124,10 +162,16 @@ def _(
124162
column: typed_expr.TypedExpr,
125163
window: typing.Optional[window_spec.WindowSpec] = None,
126164
) -> sge.Expression:
127-
# Ranking functions do not support window framing clauses.
128-
return apply_window_if_present(
129-
sge.func("RANK"), window, include_framing_clauses=False
130-
)
165+
return apply_window_if_present(sge.func("RANK"), window)
166+
167+
168+
@UNARY_OP_REGISTRATION.register(agg_ops.SizeUnaryOp)
169+
def _(
170+
op: agg_ops.SizeUnaryOp,
171+
_,
172+
window: typing.Optional[window_spec.WindowSpec] = None,
173+
) -> sge.Expression:
174+
return apply_window_if_present(sge.func("COUNT", sge.convert(1)), window)
131175

132176

133177
@UNARY_OP_REGISTRATION.register(agg_ops.SumOp)

bigframes/core/compile/sqlglot/aggregations/windows.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
def apply_window_if_present(
2626
value: sge.Expression,
2727
window: typing.Optional[window_spec.WindowSpec] = None,
28-
include_framing_clauses: bool = True,
2928
) -> sge.Expression:
3029
if window is None:
3130
return value
@@ -65,7 +64,7 @@ def apply_window_if_present(
6564
if not window.bounds and not order:
6665
return sge.Window(this=value, partition_by=group_by)
6766

68-
if not window.bounds and not include_framing_clauses:
67+
if not window.bounds:
6968
return sge.Window(this=value, partition_by=group_by, order=order)
7069

7170
kind = (

bigframes/core/compile/sqlglot/expressions/ai_ops.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,13 @@ def _(*exprs: TypedExpr, op: ops.AIGenerateInt) -> sge.Expression:
4040
return sge.func("AI.GENERATE_INT", *args)
4141

4242

43+
@register_nary_op(ops.AIGenerateDouble, pass_op=True)
44+
def _(*exprs: TypedExpr, op: ops.AIGenerateDouble) -> sge.Expression:
45+
args = [_construct_prompt(exprs, op.prompt_context)] + _construct_named_args(op)
46+
47+
return sge.func("AI.GENERATE_DOUBLE", *args)
48+
49+
4350
def _construct_prompt(
4451
exprs: tuple[TypedExpr, ...], prompt_context: tuple[str | None, ...]
4552
) -> sge.Kwarg:

0 commit comments

Comments
 (0)