Skip to content

Commit b397dce

Browse files
authored
Merge branch 'main' into tswast-cover
2 parents ad0ece6 + 305e57d commit b397dce

File tree

38 files changed

+1056
-70
lines changed

38 files changed

+1056
-70
lines changed

CHANGELOG.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,34 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.25.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.24.0...v2.25.0) (2025-10-13)
8+
9+
10+
### Features
11+
12+
* Add barh, pie plot types ([#2146](https://github.com/googleapis/python-bigquery-dataframes/issues/2146)) ([5cc3c5b](https://github.com/googleapis/python-bigquery-dataframes/commit/5cc3c5b1391a7dfa062b1d77f001726b013f6337))
13+
* Add Index.__eq__ for consts, aligned objects ([#2141](https://github.com/googleapis/python-bigquery-dataframes/issues/2141)) ([8514200](https://github.com/googleapis/python-bigquery-dataframes/commit/85142008ec895fa078d192bbab942d0257f70df3))
14+
* Add output_schema parameter to ai.generate() ([#2139](https://github.com/googleapis/python-bigquery-dataframes/issues/2139)) ([ef0b0b7](https://github.com/googleapis/python-bigquery-dataframes/commit/ef0b0b73843da2a93baf08e4cd5457fbb590b89c))
15+
* Create session-scoped `cut`, `DataFrame`, `MultiIndex`, `Index`, `Series`, `to_datetime`, and `to_timedelta` methods ([#2157](https://github.com/googleapis/python-bigquery-dataframes/issues/2157)) ([5e1e809](https://github.com/googleapis/python-bigquery-dataframes/commit/5e1e8098ecf212c91d73fa80d722d1cb3e46668b))
16+
* Replace ML.GENERATE_TEXT with AI.GENERATE for audio transcription ([#2151](https://github.com/googleapis/python-bigquery-dataframes/issues/2151)) ([a410d0a](https://github.com/googleapis/python-bigquery-dataframes/commit/a410d0ae43ef3b053b650804156eda0b1f569da9))
17+
* Support string literal inputs for AI functions ([#2152](https://github.com/googleapis/python-bigquery-dataframes/issues/2152)) ([7600001](https://github.com/googleapis/python-bigquery-dataframes/commit/760000122dc190ac8a3303234cf4cbee1bbb9493))
18+
19+
20+
### Bug Fixes
21+
22+
* Address typo in error message ([#2142](https://github.com/googleapis/python-bigquery-dataframes/issues/2142)) ([cdf2dd5](https://github.com/googleapis/python-bigquery-dataframes/commit/cdf2dd55a0c03da50ab92de09788cafac0abf6f6))
23+
* Avoid possible circular imports in global session ([#2115](https://github.com/googleapis/python-bigquery-dataframes/issues/2115)) ([095c0b8](https://github.com/googleapis/python-bigquery-dataframes/commit/095c0b85a25a2e51087880909597cc62a0341c93))
24+
* Fix too many cluster columns requested by caching ([#2155](https://github.com/googleapis/python-bigquery-dataframes/issues/2155)) ([35c1c33](https://github.com/googleapis/python-bigquery-dataframes/commit/35c1c33b85d1b92e402aab73677df3ffe43a51b4))
25+
* Show progress even in job optional queries ([#2119](https://github.com/googleapis/python-bigquery-dataframes/issues/2119)) ([1f48d3a](https://github.com/googleapis/python-bigquery-dataframes/commit/1f48d3a62e7e6dac4acb39e911daf766b8e2fe62))
26+
* Yield row count from read session if otherwise unknown ([#2148](https://github.com/googleapis/python-bigquery-dataframes/issues/2148)) ([8997d4d](https://github.com/googleapis/python-bigquery-dataframes/commit/8997d4d7d9965e473195f98c550c80657035b7e1))
27+
28+
29+
### Documentation
30+
31+
* Add a brief intro notebook for bbq AI functions ([#2150](https://github.com/googleapis/python-bigquery-dataframes/issues/2150)) ([1f434fb](https://github.com/googleapis/python-bigquery-dataframes/commit/1f434fb5c7c00601654b3ab19c6ad7fceb258bd6))
32+
* Fix ai function related docs ([#2149](https://github.com/googleapis/python-bigquery-dataframes/issues/2149)) ([93a0749](https://github.com/googleapis/python-bigquery-dataframes/commit/93a0749392b84f27162654fe5ea5baa329a23f99))
33+
* Remove progress bar from getting started template ([#2143](https://github.com/googleapis/python-bigquery-dataframes/issues/2143)) ([d13abad](https://github.com/googleapis/python-bigquery-dataframes/commit/d13abadbcd68d03997e8dc11bb7a2b14bbd57fcc))
34+
735
## [2.24.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.23.0...v2.24.0) (2025-10-07)
836

937

bigframes/bigquery/_operations/ai.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,21 @@ def generate(
6565
1 Ottawa\\n
6666
Name: result, dtype: string
6767
68-
You get structured output when the `output_schema` parameter is set:
68+
You get structured output when the `output_schema` parameter is set:
6969
7070
>>> animals = bpd.Series(["Rabbit", "Spider"])
7171
>>> bbq.ai.generate(animals, output_schema={"number_of_legs": "INT64", "is_herbivore": "BOOL"})
7272
0 {'is_herbivore': True, 'number_of_legs': 4, 'f...
7373
1 {'is_herbivore': False, 'number_of_legs': 8, '...
7474
dtype: struct<is_herbivore: bool, number_of_legs: int64, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow]
7575
76+
.. note::
77+
78+
This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
79+
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
80+
and might have limited support. For more information, see the launch stage descriptions
81+
(https://cloud.google.com/products#product-launch-stages).
82+
7683
Args:
7784
prompt (str | Series | List[str|Series] | Tuple[str|Series, ...]):
7885
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
@@ -165,6 +172,13 @@ def generate_bool(
165172
2 False
166173
Name: result, dtype: boolean
167174
175+
.. note::
176+
177+
This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
178+
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
179+
and might have limited support. For more information, see the launch stage descriptions
180+
(https://cloud.google.com/products#product-launch-stages).
181+
168182
Args:
169183
prompt (str | Series | List[str|Series] | Tuple[str|Series, ...]):
170184
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
@@ -240,6 +254,13 @@ def generate_int(
240254
2 8
241255
Name: result, dtype: Int64
242256
257+
.. note::
258+
259+
This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
260+
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
261+
and might have limited support. For more information, see the launch stage descriptions
262+
(https://cloud.google.com/products#product-launch-stages).
263+
243264
Args:
244265
prompt (str | Series | List[str|Series] | Tuple[str|Series, ...]):
245266
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
@@ -315,6 +336,13 @@ def generate_double(
315336
2 8.0
316337
Name: result, dtype: Float64
317338
339+
.. note::
340+
341+
This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
342+
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
343+
and might have limited support. For more information, see the launch stage descriptions
344+
(https://cloud.google.com/products#product-launch-stages).
345+
318346
Args:
319347
prompt (str | Series | List[str|Series] | Tuple[str|Series, ...]):
320348
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
@@ -371,6 +399,7 @@ def if_(
371399
provides optimization such that not all rows are evaluated with the LLM.
372400
373401
**Examples:**
402+
374403
>>> import bigframes.pandas as bpd
375404
>>> import bigframes.bigquery as bbq
376405
>>> bpd.options.display.progress_bar = None
@@ -386,6 +415,13 @@ def if_(
386415
1 Illinois
387416
dtype: string
388417
418+
.. note::
419+
420+
This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
421+
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
422+
and might have limited support. For more information, see the launch stage descriptions
423+
(https://cloud.google.com/products#product-launch-stages).
424+
389425
Args:
390426
prompt (str | Series | List[str|Series] | Tuple[str|Series, ...]):
391427
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
@@ -433,6 +469,13 @@ def classify(
433469
<BLANKLINE>
434470
[2 rows x 2 columns]
435471
472+
.. note::
473+
474+
This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
475+
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
476+
and might have limited support. For more information, see the launch stage descriptions
477+
(https://cloud.google.com/products#product-launch-stages).
478+
436479
Args:
437480
input (str | Series | List[str|Series] | Tuple[str|Series, ...]):
438481
A mixture of Series and string literals that specifies the input to send to the model. The Series can be BigFrames Series
@@ -482,6 +525,13 @@ def score(
482525
2 3.0
483526
dtype: Float64
484527
528+
.. note::
529+
530+
This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
531+
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
532+
and might have limited support. For more information, see the launch stage descriptions
533+
(https://cloud.google.com/products#product-launch-stages).
534+
485535
Args:
486536
prompt (str | Series | List[str|Series] | Tuple[str|Series, ...]):
487537
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series

bigframes/core/compile/sqlglot/aggregations/ordered_unary_compiler.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,8 @@
1414

1515
from __future__ import annotations
1616

17-
import typing
18-
1917
import sqlglot.expressions as sge
2018

21-
from bigframes.core import window_spec
2219
import bigframes.core.compile.sqlglot.aggregations.op_registration as reg
2320
import bigframes.core.compile.sqlglot.expressions.typed_expr as typed_expr
2421
from bigframes.operations import aggregations as agg_ops
@@ -29,9 +26,35 @@
2926
def compile(
3027
op: agg_ops.WindowOp,
3128
column: typed_expr.TypedExpr,
32-
window: typing.Optional[window_spec.WindowSpec] = None,
33-
order_by: typing.Sequence[sge.Expression] = [],
29+
*,
30+
order_by: tuple[sge.Expression, ...],
31+
) -> sge.Expression:
32+
return ORDERED_UNARY_OP_REGISTRATION[op](op, column, order_by=order_by)
33+
34+
35+
@ORDERED_UNARY_OP_REGISTRATION.register(agg_ops.ArrayAggOp)
36+
def _(
37+
op: agg_ops.ArrayAggOp,
38+
column: typed_expr.TypedExpr,
39+
*,
40+
order_by: tuple[sge.Expression, ...],
3441
) -> sge.Expression:
35-
return ORDERED_UNARY_OP_REGISTRATION[op](
36-
op, column, window=window, order_by=order_by
37-
)
42+
expr = column.expr
43+
if len(order_by) > 0:
44+
expr = sge.Order(this=column.expr, expressions=list(order_by))
45+
return sge.IgnoreNulls(this=sge.ArrayAgg(this=expr))
46+
47+
48+
@ORDERED_UNARY_OP_REGISTRATION.register(agg_ops.StringAggOp)
49+
def _(
50+
op: agg_ops.StringAggOp,
51+
column: typed_expr.TypedExpr,
52+
*,
53+
order_by: tuple[sge.Expression, ...],
54+
) -> sge.Expression:
55+
expr = column.expr
56+
if len(order_by) > 0:
57+
expr = sge.Order(this=expr, expressions=list(order_by))
58+
59+
expr = sge.GroupConcat(this=expr, separator=sge.convert(op.sep))
60+
return sge.func("COALESCE", expr, sge.convert(""))

bigframes/core/compile/sqlglot/aggregations/unary_compiler.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,27 @@ def _(
9898
return apply_window_if_present(sge.func("COUNT", column.expr), window)
9999

100100

101+
@UNARY_OP_REGISTRATION.register(agg_ops.DateSeriesDiffOp)
102+
def _(
103+
op: agg_ops.DateSeriesDiffOp,
104+
column: typed_expr.TypedExpr,
105+
window: typing.Optional[window_spec.WindowSpec] = None,
106+
) -> sge.Expression:
107+
if column.dtype != dtypes.DATE_DTYPE:
108+
raise TypeError(f"Cannot perform date series diff on type {column.dtype}")
109+
shift_op_impl = UNARY_OP_REGISTRATION[agg_ops.ShiftOp(0)]
110+
shifted = shift_op_impl(agg_ops.ShiftOp(op.periods), column, window)
111+
# Conversion factor from days to microseconds
112+
conversion_factor = 24 * 60 * 60 * 1_000_000
113+
return sge.Cast(
114+
this=sge.DateDiff(
115+
this=column.expr, expression=shifted, unit=sge.Identifier(this="DAY")
116+
)
117+
* sge.convert(conversion_factor),
118+
to="INT64",
119+
)
120+
121+
101122
@UNARY_OP_REGISTRATION.register(agg_ops.DenseRankOp)
102123
def _(
103124
op: agg_ops.DenseRankOp,
@@ -151,6 +172,23 @@ def _(
151172
)
152173

153174

175+
@UNARY_OP_REGISTRATION.register(agg_ops.DiffOp)
176+
def _(
177+
op: agg_ops.DiffOp,
178+
column: typed_expr.TypedExpr,
179+
window: typing.Optional[window_spec.WindowSpec] = None,
180+
) -> sge.Expression:
181+
shift_op_impl = UNARY_OP_REGISTRATION[agg_ops.ShiftOp(0)]
182+
shifted = shift_op_impl(agg_ops.ShiftOp(op.periods), column, window)
183+
if column.dtype in (dtypes.BOOL_DTYPE, dtypes.INT_DTYPE, dtypes.FLOAT_DTYPE):
184+
if column.dtype == dtypes.BOOL_DTYPE:
185+
return sge.NEQ(this=column.expr, expression=shifted)
186+
else:
187+
return sge.Sub(this=column.expr, expression=shifted)
188+
else:
189+
raise TypeError(f"Cannot perform diff on type {column.dtype}")
190+
191+
154192
@UNARY_OP_REGISTRATION.register(agg_ops.MaxOp)
155193
def _(
156194
op: agg_ops.MaxOp,
@@ -240,6 +278,27 @@ def _(
240278
return apply_window_if_present(sge.func("COUNT", sge.convert(1)), window)
241279

242280

281+
@UNARY_OP_REGISTRATION.register(agg_ops.ShiftOp)
282+
def _(
283+
op: agg_ops.ShiftOp,
284+
column: typed_expr.TypedExpr,
285+
window: typing.Optional[window_spec.WindowSpec] = None,
286+
) -> sge.Expression:
287+
if op.periods == 0: # No-op
288+
return column.expr
289+
if op.periods > 0:
290+
return apply_window_if_present(
291+
sge.func("LAG", column.expr, sge.convert(op.periods)),
292+
window,
293+
include_framing_clauses=False,
294+
)
295+
return apply_window_if_present(
296+
sge.func("LEAD", column.expr, sge.convert(-op.periods)),
297+
window,
298+
include_framing_clauses=False,
299+
)
300+
301+
243302
@UNARY_OP_REGISTRATION.register(agg_ops.SumOp)
244303
def _(
245304
op: agg_ops.SumOp,
@@ -255,3 +314,20 @@ def _(
255314
# Will be null if all inputs are null. Pandas defaults to zero sum though.
256315
zero = pd.to_timedelta(0) if column.dtype == dtypes.TIMEDELTA_DTYPE else 0
257316
return sge.func("IFNULL", expr, ir._literal(zero, column.dtype))
317+
318+
319+
@UNARY_OP_REGISTRATION.register(agg_ops.TimeSeriesDiffOp)
320+
def _(
321+
op: agg_ops.TimeSeriesDiffOp,
322+
column: typed_expr.TypedExpr,
323+
window: typing.Optional[window_spec.WindowSpec] = None,
324+
) -> sge.Expression:
325+
if column.dtype != dtypes.TIMESTAMP_DTYPE:
326+
raise TypeError(f"Cannot perform time series diff on type {column.dtype}")
327+
shift_op_impl = UNARY_OP_REGISTRATION[agg_ops.ShiftOp(0)]
328+
shifted = shift_op_impl(agg_ops.ShiftOp(op.periods), column, window)
329+
return sge.TimestampDiff(
330+
this=column.expr,
331+
expression=shifted,
332+
unit=sge.Identifier(this="MICROSECOND"),
333+
)

bigframes/core/indexes/base.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,9 +383,16 @@ def to_series(
383383

384384
name = self.name if name is None else name
385385
if index is None:
386-
return bigframes.series.Series(data=self, index=self, name=name)
386+
return bigframes.series.Series(
387+
data=self, index=self, name=name, session=self._session
388+
)
387389
else:
388-
return bigframes.series.Series(data=self, index=Index(index), name=name)
390+
return bigframes.series.Series(
391+
data=self,
392+
index=Index(index, session=self._session),
393+
name=name,
394+
session=self._session,
395+
)
389396

390397
def get_level_values(self, level) -> Index:
391398
level_n = level if isinstance(level, int) else self.names.index(level)

0 commit comments

Comments
 (0)