Skip to content

Commit bde9ab9

Browse files
Merge branch 'main' into crosstab
2 parents 0a86384 + 10ec52f commit bde9ab9

File tree

62 files changed

+1393
-189
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1393
-189
lines changed

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,29 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.28.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.27.0...v2.28.0) (2025-11-03)
8+
9+
10+
### Features
11+
12+
* Add bigframes.bigquery.st_simplify ([#2210](https://github.com/googleapis/python-bigquery-dataframes/issues/2210)) ([ecee2bc](https://github.com/googleapis/python-bigquery-dataframes/commit/ecee2bc6ada0bc968fc56ed7194dc8c043547e93))
13+
* Add Series.dt.day_name ([#2218](https://github.com/googleapis/python-bigquery-dataframes/issues/2218)) ([5e006e4](https://github.com/googleapis/python-bigquery-dataframes/commit/5e006e404b65c32e5b1d342ebfcfce59ee592c8c))
14+
* Polars engine supports std, var ([#2215](https://github.com/googleapis/python-bigquery-dataframes/issues/2215)) ([ef5e83a](https://github.com/googleapis/python-bigquery-dataframes/commit/ef5e83acedf005cbe1e6ad174bec523ac50517d7))
15+
* Support INFORMATION_SCHEMA views in `read_gbq` ([#1895](https://github.com/googleapis/python-bigquery-dataframes/issues/1895)) ([d97cafc](https://github.com/googleapis/python-bigquery-dataframes/commit/d97cafcb5921fca2351b18011b0e54e2631cc53d))
16+
* Support some python standard lib callables in apply/combine ([#2187](https://github.com/googleapis/python-bigquery-dataframes/issues/2187)) ([86a2756](https://github.com/googleapis/python-bigquery-dataframes/commit/86a27564b48b854a32b3d11cd2105aa0fa496279))
17+
18+
19+
### Bug Fixes
20+
21+
* Correct connection normalization in blob system tests ([#2222](https://github.com/googleapis/python-bigquery-dataframes/issues/2222)) ([a0e1e50](https://github.com/googleapis/python-bigquery-dataframes/commit/a0e1e50e47c758bdceb54d04180ed36b35cf2e35))
22+
* Improve error handling in blob operations ([#2194](https://github.com/googleapis/python-bigquery-dataframes/issues/2194)) ([d410046](https://github.com/googleapis/python-bigquery-dataframes/commit/d4100466612df0523d01ed01ca1e115dabd6ef45))
23+
* Resolve AttributeError in TableWidget and improve initialization ([#1937](https://github.com/googleapis/python-bigquery-dataframes/issues/1937)) ([4c4c9b1](https://github.com/googleapis/python-bigquery-dataframes/commit/4c4c9b14657b7cda1940ef39e7d4db20a9ff5308))
24+
25+
26+
### Documentation
27+
28+
* Update bq_dataframes_llm_output_schema.ipynb ([#2004](https://github.com/googleapis/python-bigquery-dataframes/issues/2004)) ([316ba9f](https://github.com/googleapis/python-bigquery-dataframes/commit/316ba9f557d792117d5a7845d7567498f78dd513))
29+
730
## [2.27.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.26.0...v2.27.0) (2025-10-24)
831

932

bigframes/bigquery/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
st_intersection,
4141
st_isclosed,
4242
st_length,
43+
st_regionstats,
4344
st_simplify,
4445
)
4546
from bigframes.bigquery._operations.json import (
@@ -81,6 +82,7 @@
8182
st_intersection,
8283
st_isclosed,
8384
st_length,
85+
st_regionstats,
8486
st_simplify,
8587
# json ops
8688
json_extract,

bigframes/bigquery/_operations/geo.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414

1515
from __future__ import annotations
1616

17-
from typing import Union
17+
import json
18+
from typing import Mapping, Optional, Union
1819

1920
import shapely # type: ignore
2021

2122
from bigframes import operations as ops
23+
import bigframes.dataframe
2224
import bigframes.geopandas
2325
import bigframes.series
2426

@@ -677,6 +679,65 @@ def st_length(
677679
return series
678680

679681

682+
def st_regionstats(
683+
geography: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
684+
raster_id: str,
685+
band: Optional[str] = None,
686+
include: Optional[str] = None,
687+
options: Optional[Mapping[str, Union[str, int, float]]] = None,
688+
) -> bigframes.series.Series:
689+
"""Returns statistics summarizing the pixel values of the raster image
690+
referenced by raster_id that intersect with geography.
691+
692+
The statistics include the count, minimum, maximum, sum, standard
693+
deviation, mean, and area of the valid pixels of the raster band named
694+
band_name. Google Earth Engine computes the results of the function call.
695+
696+
See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats
697+
698+
Args:
699+
geography (bigframes.series.Series | bigframes.geopandas.GeoSeries):
700+
A series of geography objects to intersect with the raster image.
701+
raster_id (str):
702+
A string that identifies a raster image. The following formats are
703+
supported. A URI from an image table provided by Google Earth Engine
704+
in BigQuery sharing (formerly Analytics Hub). A URI for a readable
705+
GeoTIFF raster file. A Google Earth Engine asset path that
706+
references public catalog data or project-owned assets with read
707+
access.
708+
band (Optional[str]):
709+
A string in one of the following formats:
710+
A single band within the raster image specified by raster_id. A
711+
formula to compute a value from the available bands in the raster
712+
image. The formula uses the Google Earth Engine image expression
713+
syntax. Bands can be referenced by their name, band_name, in
714+
expressions. If you don't specify a band, the first band of the
715+
image is used.
716+
include (Optional[str]):
717+
An optional string formula that uses the Google Earth Engine image
718+
expression syntax to compute a pixel weight. The formula should
719+
return values from 0 to 1. Values outside this range are set to the
720+
nearest limit, either 0 or 1. A value of 0 means that the pixel is
721+
invalid and it's excluded from analysis. A positive value means that
722+
a pixel is valid. Values between 0 and 1 represent proportional
723+
weights for calculations, such as weighted means.
724+
options (Mapping[str, Union[str, int, float]], optional):
725+
A dictionary of options to pass to the function. See the BigQuery
726+
documentation for a list of available options.
727+
728+
Returns:
729+
bigframes.pandas.Series:
730+
A STRUCT Series containing the computed statistics.
731+
"""
732+
op = ops.GeoStRegionStatsOp(
733+
raster_id=raster_id,
734+
band=band,
735+
include=include,
736+
options=json.dumps(options) if options else None,
737+
)
738+
return geography._apply_unary_op(op)
739+
740+
680741
def st_simplify(
681742
geography: "bigframes.series.Series",
682743
tolerance_meters: float,

bigframes/core/blocks.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
import bigframes.operations.aggregations as agg_ops
6969
from bigframes.session import dry_runs, execution_spec
7070
from bigframes.session import executor as executors
71+
from bigframes.session._io import pandas as io_pandas
7172

7273
# Type constraint for wherever column labels are used
7374
Label = typing.Hashable
@@ -711,12 +712,15 @@ def to_pandas_batches(
711712
# To reduce the number of edge cases to consider when working with the
712713
# results of this, always return at least one DataFrame. See:
713714
# b/428918844.
714-
empty_val = pd.DataFrame(
715-
{
716-
col: pd.Series([], dtype=self.expr.get_column_type(col))
717-
for col in itertools.chain(self.value_columns, self.index_columns)
718-
}
719-
)
715+
try:
716+
empty_arrow_table = self.expr.schema.to_pyarrow().empty_table()
717+
except pa.ArrowNotImplementedError:
718+
# Bug with some pyarrow versions(https://github.com/apache/arrow/issues/45262),
719+
# empty_table only supports base storage types, not extension types.
720+
empty_arrow_table = self.expr.schema.to_pyarrow(
721+
use_storage_types=True
722+
).empty_table()
723+
empty_val = io_pandas.arrow_to_pandas(empty_arrow_table, self.expr.schema)
720724
dfs = map(
721725
lambda a: a[0],
722726
itertools.zip_longest(
@@ -1992,6 +1996,31 @@ def _generate_resample_label(
19921996
Literal["epoch", "start", "start_day", "end", "end_day"],
19931997
] = "start_day",
19941998
) -> Block:
1999+
if not isinstance(rule, str):
2000+
raise NotImplementedError(
2001+
f"Only offset strings are currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
2002+
)
2003+
2004+
if rule in ("ME", "YE", "QE", "BME", "BA", "BQE", "W"):
2005+
raise NotImplementedError(
2006+
f"Offset strings 'ME', 'YE', 'QE', 'BME', 'BA', 'BQE', 'W' are not currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
2007+
)
2008+
2009+
if closed == "right":
2010+
raise NotImplementedError(
2011+
f"Only closed='left' is currently supported. {constants.FEEDBACK_LINK}",
2012+
)
2013+
2014+
if label == "right":
2015+
raise NotImplementedError(
2016+
f"Only label='left' is currently supported. {constants.FEEDBACK_LINK}",
2017+
)
2018+
2019+
if origin not in ("epoch", "start", "start_day"):
2020+
raise NotImplementedError(
2021+
f"Only origin='epoch', 'start', 'start_day' are currently supported, but got {repr(origin)}. {constants.FEEDBACK_LINK}"
2022+
)
2023+
19952024
# Validate and resolve the index or column to use for grouping
19962025
if on is None:
19972026
if len(self.index_columns) == 0:

bigframes/core/compile/ibis_compiler/operations/geo_ops.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616

1717
from typing import cast
1818

19+
from bigframes_vendored import ibis
1920
from bigframes_vendored.ibis.expr import types as ibis_types
2021
import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
22+
import bigframes_vendored.ibis.expr.operations.geospatial as ibis_geo
2123
import bigframes_vendored.ibis.expr.operations.udf as ibis_udf
2224

2325
from bigframes.core.compile.ibis_compiler import scalar_op_compiler
@@ -101,6 +103,35 @@ def geo_st_isclosed_op_impl(x: ibis_types.Value):
101103
return st_isclosed(x)
102104

103105

106+
@register_unary_op(ops.GeoStRegionStatsOp, pass_op=True)
107+
def geo_st_regionstats_op_impl(
108+
geography: ibis_types.Value,
109+
op: ops.GeoStRegionStatsOp,
110+
):
111+
if op.band:
112+
band = ibis.literal(op.band, type=ibis_dtypes.string())
113+
else:
114+
band = None
115+
116+
if op.include:
117+
include = ibis.literal(op.include, type=ibis_dtypes.string())
118+
else:
119+
include = None
120+
121+
if op.options:
122+
options = ibis.literal(op.options, type=ibis_dtypes.json())
123+
else:
124+
options = None
125+
126+
return ibis_geo.GeoRegionStats(
127+
arg=geography, # type: ignore
128+
raster_id=ibis.literal(op.raster_id, type=ibis_dtypes.string()), # type: ignore
129+
band=band, # type: ignore
130+
include=include, # type: ignore
131+
options=options, # type: ignore
132+
).to_expr()
133+
134+
104135
@register_unary_op(ops.GeoStSimplifyOp, pass_op=True)
105136
def st_simplify_op_impl(x: ibis_types.Value, op: ops.GeoStSimplifyOp):
106137
x = cast(ibis_types.GeoSpatialValue, x)

bigframes/core/compile/sqlglot/aggregations/op_registration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,5 @@ def arg_checker(*args, **kwargs):
5252
def __getitem__(self, op: str | agg_ops.WindowOp) -> CompilationFunc:
5353
key = op if isinstance(op, type) else type(op)
5454
if str(key) not in self._registered_ops:
55-
raise ValueError(f"{key} is already not registered")
55+
raise ValueError(f"{key} is not registered")
5656
return self._registered_ops[str(key)]

bigframes/core/compile/sqlglot/aggregations/unary_compiler.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,20 @@ def _(
239239
return apply_window_if_present(sge.func("MIN", column.expr), window)
240240

241241

242+
@UNARY_OP_REGISTRATION.register(agg_ops.PopVarOp)
243+
def _(
244+
op: agg_ops.PopVarOp,
245+
column: typed_expr.TypedExpr,
246+
window: typing.Optional[window_spec.WindowSpec] = None,
247+
) -> sge.Expression:
248+
expr = column.expr
249+
if column.dtype == dtypes.BOOL_DTYPE:
250+
expr = sge.Cast(this=expr, to="INT64")
251+
252+
expr = sge.func("VAR_POP", expr)
253+
return apply_window_if_present(expr, window)
254+
255+
242256
@UNARY_OP_REGISTRATION.register(agg_ops.QuantileOp)
243257
def _(
244258
op: agg_ops.QuantileOp,
@@ -278,6 +292,22 @@ def _(
278292
return apply_window_if_present(sge.func("COUNT", sge.convert(1)), window)
279293

280294

295+
@UNARY_OP_REGISTRATION.register(agg_ops.StdOp)
296+
def _(
297+
op: agg_ops.StdOp,
298+
column: typed_expr.TypedExpr,
299+
window: typing.Optional[window_spec.WindowSpec] = None,
300+
) -> sge.Expression:
301+
expr = column.expr
302+
if column.dtype == dtypes.BOOL_DTYPE:
303+
expr = sge.Cast(this=expr, to="INT64")
304+
305+
expr = sge.func("STDDEV", expr)
306+
if op.should_floor_result or column.dtype == dtypes.TIMEDELTA_DTYPE:
307+
expr = sge.Cast(this=sge.func("FLOOR", expr), to="INT64")
308+
return apply_window_if_present(expr, window)
309+
310+
281311
@UNARY_OP_REGISTRATION.register(agg_ops.ShiftOp)
282312
def _(
283313
op: agg_ops.ShiftOp,
@@ -331,3 +361,17 @@ def _(
331361
expression=shifted,
332362
unit=sge.Identifier(this="MICROSECOND"),
333363
)
364+
365+
366+
@UNARY_OP_REGISTRATION.register(agg_ops.VarOp)
367+
def _(
368+
op: agg_ops.VarOp,
369+
column: typed_expr.TypedExpr,
370+
window: typing.Optional[window_spec.WindowSpec] = None,
371+
) -> sge.Expression:
372+
expr = column.expr
373+
if column.dtype == dtypes.BOOL_DTYPE:
374+
expr = sge.Cast(this=expr, to="INT64")
375+
376+
expr = sge.func("VAR_SAMP", expr)
377+
return apply_window_if_present(expr, window)

bigframes/core/compile/sqlglot/expressions/comparison_ops.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
109109
return sge.LTE(this=left_expr, expression=right_expr)
110110

111111

112+
@register_binary_op(ops.maximum_op)
113+
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
114+
return sge.Greatest(expressions=[left.expr, right.expr])
115+
116+
112117
@register_binary_op(ops.minimum_op)
113118
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
114119
return sge.Least(this=left.expr, expressions=right.expr)

bigframes/core/compile/sqlglot/expressions/generic_ops.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
2525

2626
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
27+
register_binary_op = scalar_compiler.scalar_op_compiler.register_binary_op
2728
register_nary_op = scalar_compiler.scalar_op_compiler.register_nary_op
2829
register_ternary_op = scalar_compiler.scalar_op_compiler.register_ternary_op
2930

@@ -159,6 +160,13 @@ def _(*cases_and_outputs: TypedExpr) -> sge.Expression:
159160
)
160161

161162

163+
@register_binary_op(ops.coalesce_op)
164+
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
165+
if left.expr == right.expr:
166+
return left.expr
167+
return sge.Coalesce(this=left.expr, expressions=[right.expr])
168+
169+
162170
@register_nary_op(ops.RowKey)
163171
def _(*values: TypedExpr) -> sge.Expression:
164172
# All inputs into hash must be non-null or resulting hash will be null

bigframes/core/compile/sqlglot/expressions/geo_ops.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,32 @@ def _(expr: TypedExpr, op: ops.GeoStLengthOp) -> sge.Expression:
7474
return sge.func("ST_LENGTH", expr.expr)
7575

7676

77+
@register_unary_op(ops.GeoStRegionStatsOp, pass_op=True)
78+
def _(
79+
geography: TypedExpr,
80+
op: ops.GeoStRegionStatsOp,
81+
):
82+
args = [geography.expr, sge.convert(op.raster_id)]
83+
if op.band:
84+
args.append(sge.Kwarg(this="band", expression=sge.convert(op.band)))
85+
if op.include:
86+
args.append(sge.Kwarg(this="include", expression=sge.convert(op.include)))
87+
if op.options:
88+
args.append(
89+
sge.Kwarg(this="options", expression=sge.JSON(this=sge.convert(op.options)))
90+
)
91+
return sge.func("ST_REGIONSTATS", *args)
92+
93+
94+
@register_unary_op(ops.GeoStSimplifyOp, pass_op=True)
95+
def _(expr: TypedExpr, op: ops.GeoStSimplifyOp) -> sge.Expression:
96+
return sge.func(
97+
"ST_SIMPLIFY",
98+
expr.expr,
99+
sge.convert(op.tolerance_meters),
100+
)
101+
102+
77103
@register_unary_op(ops.geo_x_op)
78104
def _(expr: TypedExpr) -> sge.Expression:
79105
return sge.func("SAFE.ST_X", expr.expr)

0 commit comments

Comments
 (0)