Skip to content

Commit 745211e

Browse files
committed
add arguments to st_regionstats and make it into a unary op
1 parent 6a4b1c9 commit 745211e

File tree

4 files changed

+53
-27
lines changed

4 files changed

+53
-27
lines changed

bigframes/bigquery/_operations/geo.py

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from __future__ import annotations
1616

1717
import json
18-
from typing import Mapping, Union
18+
from typing import Mapping, Optional, Union
1919

2020
import shapely # type: ignore
2121

@@ -681,25 +681,46 @@ def st_length(
681681

682682
def st_regionstats(
683683
geography: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
684-
raster: bigframes.series.Series,
685-
band: str,
686-
options: Mapping[str, Union[str, int, float]] = {},
684+
raster_id: str,
685+
band: Optional[str] = None,
686+
include: Optional[str] = None,
687+
options: Optional[Mapping[str, Union[str, int, float]]] = None,
687688
) -> bigframes.dataframe.DataFrame:
688-
"""Computes statistics for a raster band within a given geography.
689+
"""Returns statistics summarizing the pixel values of the raster image
690+
referenced by raster_id that intersect with geography.
689691
690-
See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats
692+
The statistics include the count, minimum, maximum, sum, standard
693+
deviation, mean, and area of the valid pixels of the raster band named
694+
band_name. Google Earth Engine computes the results of the function call.
691695
692-
.. warning::
693-
This function requires the Earth Engine API to be enabled.
696+
See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats
694697
695698
Args:
696699
geography (bigframes.series.Series | bigframes.geopandas.GeoSeries):
697-
A series of geography objects.
698-
raster (bigframes.series.Series):
699-
A series of raster URIs. This can be a Google Cloud Storage URI,
700-
or an Earth Engine asset ID.
701-
band (str):
702-
The name of the raster band to compute statistics for.
700+
A series of geography objects to intersect with the raster image.
701+
raster_id (str):
702+
A string that identifies a raster image. The following formats are
703+
supported. A URI from an image table provided by Google Earth Engine
704+
in BigQuery sharing (formerly Analytics Hub). A URI for a readable
705+
GeoTIFF raster file. A Google Earth Engine asset path that
706+
references public catalog data or project-owned assets with read
707+
access.
708+
band (Optional[str]):
709+
A string in one of the following formats:
710+
A single band within the raster image specified by raster_id. A
711+
formula to compute a value from the available bands in the raster
712+
image. The formula uses the Google Earth Engine image expression
713+
syntax. Bands can be referenced by their name, band_name, in
714+
expressions. If you don't specify a band, the first band of the
715+
image is used.
716+
include (Optional[str]):
717+
An optional string formula that uses the Google Earth Engine image
718+
expression syntax to compute a pixel weight. The formula should
719+
return values from 0 to 1. Values outside this range are set to the
720+
nearest limit, either 0 or 1. A value of 0 means that the pixel is
721+
invalid and it's excluded from analysis. A positive value means that
722+
a pixel is valid. Values between 0 and 1 represent proportional
723+
weights for calculations, such as weighted means.
703724
options (Mapping[str, Union[str, int, float]], optional):
704725
A dictionary of options to pass to the function. See the BigQuery
705726
documentation for a list of available options.
@@ -708,6 +729,11 @@ def st_regionstats(
708729
bigframes.dataframe.DataFrame:
709730
A dataframe containing the computed statistics.
710731
"""
711-
op = ops.StRegionStatsOp(options=json.dumps(options) if options else None)
712-
df = geography._apply_ternary_op(raster, band, op)
732+
op = ops.StRegionStatsOp(
733+
raster_id=raster_id,
734+
band=band,
735+
include=include,
736+
options=json.dumps(options) if options else None,
737+
)
738+
df = geography._apply_unary_op(op)
713739
return df[df.columns[0]].struct.explode()

bigframes/core/compile/ibis_compiler/scalar_op_compiler.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -288,16 +288,15 @@ def isfinite(arg):
288288
return arg.isinf().negate() & arg.isnan().negate()
289289

290290

291-
@scalar_op_compiler.register_ternary_op(geo_ops.StRegionStatsOp, pass_op=True)
291+
@scalar_op_compiler.register_unary_op(geo_ops.StRegionStatsOp, pass_op=True)
292292
def st_regionstats(
293293
geography: ibis_types.Value,
294-
raster: ibis_types.Value,
295-
band: ibis_types.Value,
296294
op: geo_ops.StRegionStatsOp,
297295
):
298-
args = [geography, raster, band]
296+
args = [geography] # TODO: get band, include, and other properies from op.
299297
if op.options:
300298
args.append(bigframes_vendored.ibis.literal(op.options, type="json"))
299+
# TODO: We may need a custom ibis op so that we can pass arguments by name instead of position.
301300
return bigframes_vendored.ibis.remote_function(
302301
"st_regionstats",
303302
args,

bigframes/core/compile/sqlglot/scalar_compiler.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -230,14 +230,12 @@ def _add_parentheses(cls, expr: TypedExpr) -> TypedExpr:
230230
scalar_op_compiler = ScalarOpCompiler()
231231

232232

233-
@scalar_op_compiler.register_ternary_op(geo_ops.StRegionStatsOp, pass_op=True)
233+
@scalar_op_compiler.register_unary_op(geo_ops.StRegionStatsOp, pass_op=True)
234234
def compile_st_regionstats(
235235
geography: TypedExpr,
236-
raster: TypedExpr,
237-
band: TypedExpr,
238236
op: geo_ops.StRegionStatsOp,
239237
):
240-
args = [geography.expr, raster.expr, band.expr]
238+
args = [geography.expr] # TODO: get raster, band, include from op.
241239
if op.options:
242240
args.append(
243241
sge.Anonymous(

bigframes/operations/geo_ops.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414

1515
import dataclasses
16-
import typing
16+
from typing import Optional
1717

1818
from bigframes import dtypes
1919
from bigframes.operations import base_ops
@@ -128,11 +128,14 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
128128

129129

130130
@dataclasses.dataclass(frozen=True)
131-
class StRegionStatsOp(base_ops.TernaryOp):
131+
class StRegionStatsOp(base_ops.UnaryOp):
132132
"""See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats"""
133133

134134
name = "st_regionstats"
135-
options: typing.Optional[str] = None
135+
raster_id: str
136+
band: Optional[str]
137+
include: Optional[str]
138+
options: Optional[str]
136139

137140
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
138141
return dtypes.struct_type(

0 commit comments

Comments
 (0)