Skip to content

Commit 03a153c

Browse files
feat: Add BigFrames.bigquery.st_regionstats method
This commit adds the `BigFrames.bigquery.st_regionstats` method, which allows users to compute statistics for a raster band within a given geography. The implementation includes: - A new `StRegionStatsOp` in `bigframes/operations/geo_ops.py`. - Compiler implementations for both the SQLGlot and Ibis backends. - A unit test with a SQL snapshot. - A code sample in `samples/snippets/wildfire_risk.py` that demonstrates the use of the new function.
1 parent dc46b3c commit 03a153c

File tree

11 files changed

+296
-6
lines changed

11 files changed

+296
-6
lines changed

bigframes/bigquery/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
st_intersection,
4141
st_isclosed,
4242
st_length,
43+
st_regionstats,
4344
)
4445
from bigframes.bigquery._operations.json import (
4546
json_extract,
@@ -80,6 +81,7 @@
8081
st_intersection,
8182
st_isclosed,
8283
st_length,
84+
st_regionstats,
8385
# json ops
8486
json_extract,
8587
json_extract_array,

bigframes/bigquery/_operations/geo.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414

1515
from __future__ import annotations
1616

17-
from typing import Union
17+
import json
18+
from typing import Mapping, Union
1819

1920
import shapely # type: ignore
2021

2122
from bigframes import operations as ops
23+
import bigframes.dataframe
2224
import bigframes.geopandas
2325
import bigframes.series
2426

@@ -675,3 +677,38 @@ def st_length(
675677
series = series._apply_unary_op(ops.GeoStLengthOp(use_spheroid=use_spheroid))
676678
series.name = None
677679
return series
680+
681+
682+
def st_regionstats(
683+
geography: bigframes.geopandas.GeoSeries,
684+
raster: bigframes.series.Series,
685+
band: str,
686+
*,
687+
options: Mapping[str, Union[str, int, float]] = {},
688+
) -> bigframes.dataframe.DataFrame:
689+
"""Computes statistics for a raster band within a given geography.
690+
691+
See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats
692+
693+
.. warning::
694+
This function requires the Earth Engine API to be enabled.
695+
696+
Args:
697+
geography (bigframes.geopandas.GeoSeries):
698+
A series of geography objects.
699+
raster (bigframes.series.Series):
700+
A series of raster URIs. This can be a Google Cloud Storage URI,
701+
or an Earth Engine asset ID.
702+
band (str):
703+
The name of the raster band to compute statistics for.
704+
options (Mapping[str, Union[str, int, float]], optional):
705+
A dictionary of options to pass to the function. See the BigQuery
706+
documentation for a list of available options.
707+
708+
Returns:
709+
bigframes.dataframe.DataFrame:
710+
A dataframe containing the computed statistics.
711+
"""
712+
op = ops.StRegionStatsOp(options=json.dumps(options) if options else None)
713+
df = geography._apply_ternary_op(raster, band, op)
714+
return df[df.columns[0]].struct.explode()

bigframes/core/compile/ibis_compiler/scalar_op_compiler.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from bigframes.core import agg_expressions, ordering
2727
import bigframes.core.compile.ibis_types
2828
import bigframes.core.expression as ex
29-
from bigframes.operations import numeric_ops
29+
from bigframes.operations import geo_ops, numeric_ops
3030

3131
if TYPE_CHECKING:
3232
import bigframes.operations as ops
@@ -159,7 +159,9 @@ def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
159159
return decorator
160160

161161
def register_ternary_op(
162-
self, op_ref: typing.Union[ops.TernaryOp, type[ops.TernaryOp]]
162+
self,
163+
op_ref: typing.Union[ops.TernaryOp, type[ops.TernaryOp]],
164+
pass_op: bool = False,
163165
):
164166
"""
165167
Decorator to register a ternary op implementation.
@@ -172,7 +174,10 @@ def register_ternary_op(
172174

173175
def decorator(impl: typing.Callable[..., ibis_types.Value]):
174176
def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
175-
return impl(args[0], args[1], args[2])
177+
if pass_op:
178+
return impl(args[0], args[1], args[2], op)
179+
else:
180+
return impl(args[0], args[1], args[2])
176181

177182
self._register(key, normalized_impl)
178183
return impl
@@ -278,3 +283,18 @@ def isnanornull(arg):
278283
@scalar_op_compiler.register_unary_op(numeric_ops.isfinite_op)
279284
def isfinite(arg):
280285
return arg.isinf().negate() & arg.isnan().negate()
286+
287+
288+
@scalar_op_compiler.register_ternary_op(geo_ops.StRegionStatsOp, pass_op=True)
289+
def st_regionstats(
290+
geography: ibis_types.Value,
291+
raster: ibis_types.Value,
292+
band: ibis_types.Value,
293+
op: geo_ops.StRegionStatsOp,
294+
):
295+
args = [geography, raster, band]
296+
if op.options:
297+
args.append(bigframes_vendored.ibis.literal(op.options, type="json"))
298+
return bigframes_vendored.ibis.remote_function(
299+
"st_regionstats", args, output_type="struct<min: float, max: float, sum: float, count: int, mean: float>" # type: ignore
300+
)

bigframes/core/compile/sqlglot/scalar_compiler.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import bigframes.core.compile.sqlglot.sqlglot_ir as ir
2323
import bigframes.core.expression as ex
2424
import bigframes.operations as ops
25+
import bigframes.operations.geo_ops as geo_ops
2526

2627

2728
class ScalarOpCompiler:
@@ -121,20 +122,28 @@ def normalized_impl(args: typing.Sequence[TypedExpr], op: ops.RowOp):
121122
return decorator
122123

123124
def register_ternary_op(
124-
self, op_ref: typing.Union[ops.TernaryOp, type[ops.TernaryOp]]
125+
self,
126+
op_ref: typing.Union[ops.TernaryOp, type[ops.TernaryOp]],
127+
pass_op: bool = False,
125128
):
126129
"""
127130
Decorator to register a ternary op implementation.
128131
129132
Args:
130133
op_ref (TernaryOp or TernaryOp type):
131134
Class or instance of operator that is implemented by the decorated function.
135+
pass_op (bool):
136+
Set to true if implementation takes the operator object as the last argument.
137+
This is needed for parameterized ops where parameters are part of op object.
132138
"""
133139
key = typing.cast(str, op_ref.name)
134140

135141
def decorator(impl: typing.Callable[..., sge.Expression]):
136142
def normalized_impl(args: typing.Sequence[TypedExpr], op: ops.RowOp):
137-
return impl(args[0], args[1], args[2])
143+
if pass_op:
144+
return impl(args[0], args[1], args[2], op)
145+
else:
146+
return impl(args[0], args[1], args[2])
138147

139148
self._register(key, normalized_impl)
140149
return impl
@@ -180,3 +189,23 @@ def _register(
180189

181190
# Singleton compiler
182191
scalar_op_compiler = ScalarOpCompiler()
192+
193+
194+
@scalar_op_compiler.register_ternary_op(geo_ops.StRegionStatsOp, pass_op=True)
195+
def compile_st_regionstats(
196+
geography: TypedExpr,
197+
raster: TypedExpr,
198+
band: TypedExpr,
199+
op: geo_ops.StRegionStatsOp,
200+
):
201+
args = [geography.expr, raster.expr, band.expr]
202+
if op.options:
203+
args.append(
204+
sge.EQ(
205+
this=sge.Identifier(this="OPTIONS"),
206+
expression=sge.Anonymous(
207+
this="JSON", expressions=[sge.convert(op.options)]
208+
),
209+
)
210+
)
211+
return sge.func("ST_REGIONSTATS", *args)

bigframes/operations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
GeoStBufferOp,
122122
GeoStDistanceOp,
123123
GeoStLengthOp,
124+
StRegionStatsOp,
124125
)
125126
from bigframes.operations.json_ops import (
126127
JSONExtract,
@@ -419,6 +420,7 @@
419420
"geo_x_op",
420421
"geo_y_op",
421422
"GeoStDistanceOp",
423+
"StRegionStatsOp",
422424
# AI ops
423425
"AIClassify",
424426
"AIGenerate",

bigframes/operations/geo_ops.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import dataclasses
16+
import typing
1617

1718
from bigframes import dtypes
1819
from bigframes.operations import base_ops
@@ -126,6 +127,25 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
126127
return dtypes.FLOAT_DTYPE
127128

128129

130+
@dataclasses.dataclass(frozen=True)
131+
class StRegionStatsOp(base_ops.TernaryOp):
132+
"""See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats"""
133+
134+
name = "st_regionstats"
135+
options: typing.Optional[str] = None
136+
137+
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
138+
return dtypes.struct_type(
139+
[
140+
("min", dtypes.FLOAT_DTYPE),
141+
("max", dtypes.FLOAT_DTYPE),
142+
("sum", dtypes.FLOAT_DTYPE),
143+
("count", dtypes.INT_DTYPE),
144+
("mean", dtypes.FLOAT_DTYPE),
145+
]
146+
)
147+
148+
129149
@dataclasses.dataclass(frozen=True)
130150
class GeoStLengthOp(base_ops.UnaryOp):
131151
name = "geo_st_length"

bigframes/series.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2722,6 +2722,20 @@ def _apply_binary_op(
27222722
block, result_id = self._block.project_expr(expr, name)
27232723
return Series(block.select_column(result_id))
27242724

2725+
def _apply_ternary_op(
2726+
self,
2727+
other1: typing.Any,
2728+
other2: typing.Any,
2729+
op: ops.TernaryOp,
2730+
) -> bigframes.dataframe.DataFrame:
2731+
"""Applies a ternary operator to the series and others."""
2732+
(self_col, other1_col, other2_col, block) = self._align3(other1, other2)
2733+
name = self._name
2734+
block, result_id = block.project_expr(
2735+
op.as_expr(self_col, other1_col, other2_col), name
2736+
)
2737+
return bigframes.dataframe.DataFrame(block.select_column(result_id))
2738+
27252739
def _apply_nary_op(
27262740
self,
27272741
op: ops.NaryOp,

samples/snippets/wildfire_risk.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import bigframes.bigquery as bbq
16+
import bigframes.pandas as bpd
17+
18+
# Step 1: Select inputs from datasets that we've subscribed to
19+
wildfire_raster = bpd.read_gbq("wildfire_risk_to_community_v0_mosaic.fire")[
20+
"assets.image.href"
21+
]
22+
places = bpd.read_gbq("bigquery-public-data.geo_us_census_places.places_colorado")[
23+
["place_id", "place_name", "place_geom"]
24+
]
25+
places = places.rename(columns={"place_geom": "geo"})
26+
27+
# Step 2: Compute the weather forecast using WeatherNext Graph forecast data
28+
weather_forecast = bpd.read_gbq("weathernext_graph_forecasts.59572747_4_0")
29+
weather_forecast = weather_forecast[
30+
weather_forecast["init_time"] == "2025-04-28 00:00:00+00:00"
31+
]
32+
weather_forecast = weather_forecast.explode("forecast")
33+
wind_speed = (
34+
weather_forecast["forecast"]["10m_u_component_of_wind"] ** 2
35+
+ weather_forecast["forecast"]["10m_v_component_of_wind"] ** 2
36+
) ** 0.5
37+
weather_forecast = weather_forecast.assign(wind_speed=wind_speed)
38+
weather_forecast = weather_forecast[weather_forecast["forecast"]["hours"] < 24]
39+
weather_forecast = weather_forecast.merge(
40+
places, how="inner", left_on="geography_polygon", right_on="geo"
41+
)
42+
weather_forecast = weather_forecast.groupby("place_id").agg(
43+
place_name=("place_name", "first"),
44+
geo=("geo", "first"),
45+
average_wind_speed=("wind_speed", "mean"),
46+
maximum_wind_speed=("wind_speed", "max"),
47+
)
48+
49+
# Step 3: Combine with wildfire risk for each community
50+
wildfire_risk = weather_forecast.assign(
51+
wildfire_likelihood=bbq.st_regionstats(
52+
weather_forecast["geo"],
53+
wildfire_raster,
54+
"BP",
55+
options={"scale": 1000},
56+
)["mean"],
57+
wildfire_consequence=bbq.st_regionstats(
58+
weather_forecast["geo"],
59+
wildfire_raster,
60+
"CRPS",
61+
options={"scale": 1000},
62+
)["mean"],
63+
)
64+
65+
# Step 4: Compute a simple composite index of relative wildfire risk.
66+
relative_risk = (
67+
wildfire_risk["wildfire_likelihood"].rank(pct=True)
68+
+ wildfire_risk["wildfire_consequence"].rank(pct=True)
69+
+ wildfire_risk["average_wind_speed"].rank(pct=True)
70+
) / 3 * 100
71+
wildfire_risk = wildfire_risk.assign(relative_risk=relative_risk)

tests/system/small/geopandas/test_geoseries.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,30 @@ def test_geo_centroid(session: bigframes.session.Session):
542542
)
543543

544544

545+
def test_st_regionstats(session: bigframes.session.Session):
546+
# This raster is a global image of forest cover.
547+
# See: https://developers.google.com/earth-engine/datasets/catalog/UMD_hansen_global_forest_change_2022_v1_10
548+
raster = "projects/earthengine-public/assets/images/UMD/hansen/global_forest_change_2022_v1_10"
549+
# A small polygon over a forested area in Brazil.
550+
polygon = Polygon(
551+
[
552+
(-49.8, -10.3),
553+
(-49.8, -10.0),
554+
(-49.5, -10.0),
555+
(-49.5, -10.3),
556+
(-49.8, -10.3),
557+
]
558+
)
559+
geos = bigframes.geopandas.GeoSeries([polygon], session=session)
560+
rasters = bigframes.pandas.Series([raster], dtype="string", session=session)
561+
result = bigframes.bigquery.st_regionstats(
562+
geos, rasters, "loss", options={"scale": 1000}
563+
).to_pandas()
564+
assert result is not None
565+
assert "mean" in result.columns
566+
assert result["mean"][0] > 0
567+
568+
545569
def test_geo_convex_hull(session: bigframes.session.Session):
546570
bf_s = bigframes.series.Series(
547571
[

0 commit comments

Comments
 (0)