Skip to content

Commit a680526

Browse files
feat: Add BigFrames.bigquery.st_regionstats method
This commit introduces the `st_regionstats` method in `bigframes.bigquery`, allowing users to compute statistics for a raster band within a given geography. Key changes: - Added `StRegionStatsOp` in `bigframes/operations/geo_ops.py`. - Implemented compilation logic for the operation in both Ibis and SQLGlot compilers. - Exposed the `st_regionstats` function in `bigframes/bigquery/_operations/geo.py` and the public API. - Added a new `_apply_ternary_op` method to `bigframes.series.Series`. - Included a unit test with a snapshot to verify the generated SQL. - Added a system test that demonstrates the functionality by converting a complex wildfire risk analysis query from SQL to BigFrames. - Refactored compiler registries to support `pass_op=True` for ternary operations, enabling access to operator parameters during compilation.
1 parent b4db49c commit a680526

File tree

14 files changed

+301
-607
lines changed

14 files changed

+301
-607
lines changed

bigframes/bigquery/_operations/ai.py

Lines changed: 2 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,12 @@
1919
from __future__ import annotations
2020

2121
import json
22-
from typing import Any, Iterable, List, Literal, Mapping, Tuple, Union
22+
from typing import Any, List, Literal, Mapping, Tuple, Union
2323

2424
import pandas as pd
2525

26-
from bigframes import clients, dataframe, dtypes
27-
from bigframes import pandas as bpd
28-
from bigframes import series, session
26+
from bigframes import clients, dtypes, series, session
2927
from bigframes.core import convert, log_adapter
30-
from bigframes.ml import core as ml_core
3128
from bigframes.operations import ai_ops, output_schemas
3229

3330
PROMPT_TYPE = Union[
@@ -551,91 +548,6 @@ def score(
551548
return series_list[0]._apply_nary_op(operator, series_list[1:])
552549

553550

554-
@log_adapter.method_logger(custom_base_name="bigquery_ai")
555-
def forecast(
556-
df: dataframe.DataFrame | pd.DataFrame,
557-
*,
558-
data_col: str,
559-
timestamp_col: str,
560-
model: str = "TimesFM 2.0",
561-
id_cols: Iterable[str] | None = None,
562-
horizon: int = 10,
563-
confidence_level: float = 0.95,
564-
context_window: int | None = None,
565-
) -> dataframe.DataFrame:
566-
"""
567-
Forecast time series at future horizon. Using Google Research's open source TimesFM(https://github.com/google-research/timesfm) model.
568-
569-
.. note::
570-
571-
This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
572-
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
573-
and might have limited support. For more information, see the launch stage descriptions
574-
(https://cloud.google.com/products#product-launch-stages).
575-
576-
Args:
577-
df (DataFrame):
578-
The dataframe that contains the data that you want to forecast. It could be either a BigFrames Dataframe or
579-
a pandas DataFrame. If it's a pandas DataFrame, the global BigQuery session will be used to load the data.
580-
data_col (str):
581-
A str value that specifies the name of the data column. The data column contains the data to forecast.
582-
The data column must use one of the following data types: INT64, NUMERIC and FLOAT64
583-
timestamp_col (str):
584-
A str value that specified the name of the time points column.
585-
The time points column provides the time points used to generate the forecast.
586-
The time points column must use one of the following data types: TIMESTAMP, DATE and DATETIME
587-
model (str, default "TimesFM 2.0"):
588-
A str value that specifies the name of the model. TimesFM 2.0 is the only supported value, and is the default value.
589-
id_cols (Iterable[str], optional):
590-
An iterable of str value that specifies the names of one or more ID columns. Each ID identifies a unique time series to forecast.
591-
Specify one or more values for this argument in order to forecast multiple time series using a single query.
592-
The columns that you specify must use one of the following data types: STRING, INT64, ARRAY<STRING> and ARRAY<INT64>
593-
horizon (int, default 10):
594-
An int value that specifies the number of time points to forecast. The default value is 10. The valid input range is [1, 10,000].
595-
confidence_level (float, default 0.95):
596-
A FLOAT64 value that specifies the percentage of the future values that fall in the prediction interval.
597-
The default value is 0.95. The valid input range is [0, 1).
598-
context_window (int, optional):
599-
An int value that specifies the context window length used by BigQuery ML's built-in TimesFM model.
600-
The context window length determines how many of the most recent data points from the input time series are use by the model.
601-
If you don't specify a value, the AI.FORECAST function automatically chooses the smallest possible context window length to use
602-
that is still large enough to cover the number of time series data points in your input data.
603-
604-
Returns:
605-
DataFrame:
606-
The forecast dataframe matches that of the BigQuery AI.FORECAST function.
607-
See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-forecast
608-
609-
Raises:
610-
ValueError: when any column ID does not exist in the dataframe.
611-
"""
612-
613-
if isinstance(df, pd.DataFrame):
614-
# Load the pandas DataFrame with global session
615-
df = bpd.read_pandas(df)
616-
617-
columns = [timestamp_col, data_col]
618-
if id_cols:
619-
columns += id_cols
620-
for column in columns:
621-
if column not in df.columns:
622-
raise ValueError(f"Column `{column}` not found")
623-
624-
options: dict[str, Union[int, float, str, Iterable[str]]] = {
625-
"data_col": data_col,
626-
"timestamp_col": timestamp_col,
627-
"model": model,
628-
"horizon": horizon,
629-
"confidence_level": confidence_level,
630-
}
631-
if id_cols:
632-
options["id_cols"] = id_cols
633-
if context_window:
634-
options["context_window"] = context_window
635-
636-
return ml_core.BaseBqml(df._session).ai_forecast(input_data=df, options=options)
637-
638-
639551
def _separate_context_and_series(
640552
prompt: PROMPT_TYPE,
641553
) -> Tuple[List[str | None], List[series.Series]]:

bigframes/bigquery/_operations/geo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,7 @@ def st_length(
680680

681681

682682
def st_regionstats(
683-
geography: bigframes.geopandas.GeoSeries,
683+
geography: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
684684
raster: bigframes.series.Series,
685685
band: str,
686686
options: Mapping[str, Union[str, int, float]] = {},
@@ -693,7 +693,7 @@ def st_regionstats(
693693
This function requires the Earth Engine API to be enabled.
694694
695695
Args:
696-
geography (bigframes.geopandas.GeoSeries):
696+
geography (bigframes.series.Series | bigframes.geopandas.GeoSeries):
697697
A series of geography objects.
698698
raster (bigframes.series.Series):
699699
A series of raster URIs. This can be a Google Cloud Storage URI,

bigframes/core/compile/sqlglot/scalar_compiler.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,9 @@ def compile_st_regionstats(
205205
this="_",
206206
expressions=[
207207
sge.Identifier(this="OPTIONS"),
208-
sge.Anonymous(this="JSON", expressions=[sge.convert(op.options)]),
208+
sge.Anonymous(
209+
this="JSON", expressions=[sge.convert(op.options)]
210+
),
209211
],
210212
)
211213
)

bigframes/dataframe.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5328,16 +5328,12 @@ def _throw_if_null_index(self, opname: str):
53285328
@property
53295329
def semantics(self):
53305330
msg = bfe.format_message(
5331-
"The 'semantics' property will be removed. Please use 'bigframes.bigquery.ai' instead."
5331+
"The 'semantics' property will be removed. Please use 'ai' instead."
53325332
)
53335333
warnings.warn(msg, category=FutureWarning)
53345334
return bigframes.operations.semantics.Semantics(self)
53355335

53365336
@property
53375337
def ai(self):
53385338
"""Returns the accessor for AI operators."""
5339-
msg = bfe.format_message(
5340-
"The 'ai' property will be removed. Please use 'bigframes.bigquery.ai' instead."
5341-
)
5342-
warnings.warn(msg, category=FutureWarning)
53435339
return bigframes.operations.ai.AIAccessor(self)

bigframes/display/anywidget.py

Lines changed: 23 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,13 @@
2323
import pandas as pd
2424

2525
import bigframes
26-
from bigframes.core import blocks
2726
import bigframes.dataframe
2827
import bigframes.display.html
2928

30-
# anywidget and traitlets are optional dependencies. We don't want the import of
31-
# this module to fail if they aren't installed, though. Instead, we try to
32-
# limit the surface that these packages could affect. This makes unit testing
33-
# easier and ensures we don't accidentally make these required packages.
29+
# anywidget and traitlets are optional dependencies. We don't want the import of this
30+
# module to fail if they aren't installed, though. Instead, we try to limit the surface that
31+
# these packages could affect. This makes unit testing easier and ensures we don't
32+
# accidentally make these required packages.
3433
try:
3534
import anywidget
3635
import traitlets
@@ -47,21 +46,9 @@
4746

4847

4948
class TableWidget(WIDGET_BASE):
50-
"""An interactive, paginated table widget for BigFrames DataFrames.
51-
52-
This widget provides a user-friendly way to display and navigate through
53-
large BigQuery DataFrames within a Jupyter environment.
5449
"""
55-
56-
page = traitlets.Int(0).tag(sync=True)
57-
page_size = traitlets.Int(0).tag(sync=True)
58-
row_count = traitlets.Int(0).tag(sync=True)
59-
table_html = traitlets.Unicode().tag(sync=True)
60-
_initial_load_complete = traitlets.Bool(False).tag(sync=True)
61-
_batches: Optional[blocks.PandasBatches] = None
62-
_error_message = traitlets.Unicode(allow_none=True, default_value=None).tag(
63-
sync=True
64-
)
50+
An interactive, paginated table widget for BigFrames DataFrames.
51+
"""
6552

6653
def __init__(self, dataframe: bigframes.dataframe.DataFrame):
6754
"""Initialize the TableWidget.
@@ -74,11 +61,10 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
7461
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
7562
)
7663

77-
self._dataframe = dataframe
78-
7964
super().__init__()
65+
self._dataframe = dataframe
8066

81-
# Initialize attributes that might be needed by observers first
67+
# Initialize attributes that might be needed by observers FIRST
8268
self._table_id = str(uuid.uuid4())
8369
self._all_data_loaded = False
8470
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
@@ -87,6 +73,9 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
8773
# respect display options for initial page size
8874
initial_page_size = bigframes.options.display.max_rows
8975

76+
# Initialize data fetching attributes.
77+
self._batches = dataframe._to_pandas_batches(page_size=initial_page_size)
78+
9079
# set traitlets properties that trigger observers
9180
self.page_size = initial_page_size
9281

@@ -95,21 +84,12 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
9584
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
9685
# before we get here so that the count might already be cached.
9786
# TODO(b/452747934): Allow row_count to be None and check to see if
98-
# there are multiple pages and show "page 1 of many" in this case
99-
self._reset_batches_for_new_page_size()
100-
if self._batches is None or self._batches.total_rows is None:
101-
self._error_message = "Could not determine total row count. Data might be unavailable or an error occurred."
102-
self.row_count = 0
103-
else:
104-
self.row_count = self._batches.total_rows
87+
# there are multiple pages and show "page 1 of many" in this case.
88+
self.row_count = self._batches.total_rows or 0
10589

10690
# get the initial page
10791
self._set_table_html()
10892

109-
# Signals to the frontend that the initial data load is complete.
110-
# Also used as a guard to prevent observers from firing during initialization.
111-
self._initial_load_complete = True
112-
11393
@functools.cached_property
11494
def _esm(self):
11595
"""Load JavaScript code from external file."""
@@ -120,6 +100,11 @@ def _css(self):
120100
"""Load CSS code from external file."""
121101
return resources.read_text(bigframes.display, "table_widget.css")
122102

103+
page = traitlets.Int(0).tag(sync=True)
104+
page_size = traitlets.Int(25).tag(sync=True)
105+
row_count = traitlets.Int(0).tag(sync=True)
106+
table_html = traitlets.Unicode().tag(sync=True)
107+
123108
@traitlets.validate("page")
124109
def _validate_page(self, proposal: Dict[str, Any]) -> int:
125110
"""Validate and clamp the page number to a valid range.
@@ -186,10 +171,7 @@ def _get_next_batch(self) -> bool:
186171
def _batch_iterator(self) -> Iterator[pd.DataFrame]:
187172
"""Lazily initializes and returns the batch iterator."""
188173
if self._batch_iter is None:
189-
if self._batches is None:
190-
self._batch_iter = iter([])
191-
else:
192-
self._batch_iter = iter(self._batches)
174+
self._batch_iter = iter(self._batches)
193175
return self._batch_iter
194176

195177
@property
@@ -199,22 +181,15 @@ def _cached_data(self) -> pd.DataFrame:
199181
return pd.DataFrame(columns=self._dataframe.columns)
200182
return pd.concat(self._cached_batches, ignore_index=True)
201183

202-
def _reset_batches_for_new_page_size(self) -> None:
184+
def _reset_batches_for_new_page_size(self):
203185
"""Reset the batch iterator when page size changes."""
204186
self._batches = self._dataframe._to_pandas_batches(page_size=self.page_size)
205-
206187
self._cached_batches = []
207188
self._batch_iter = None
208189
self._all_data_loaded = False
209190

210-
def _set_table_html(self) -> None:
191+
def _set_table_html(self):
211192
"""Sets the current html data based on the current page and page size."""
212-
if self._error_message:
213-
self.table_html = (
214-
f"<div class='bigframes-error-message'>{self._error_message}</div>"
215-
)
216-
return
217-
218193
start = self.page * self.page_size
219194
end = start + self.page_size
220195

@@ -236,17 +211,13 @@ def _set_table_html(self) -> None:
236211
)
237212

238213
@traitlets.observe("page")
239-
def _page_changed(self, _change: Dict[str, Any]) -> None:
214+
def _page_changed(self, _change: Dict[str, Any]):
240215
"""Handler for when the page number is changed from the frontend."""
241-
if not self._initial_load_complete:
242-
return
243216
self._set_table_html()
244217

245218
@traitlets.observe("page_size")
246-
def _page_size_changed(self, _change: Dict[str, Any]) -> None:
219+
def _page_size_changed(self, _change: Dict[str, Any]):
247220
"""Handler for when the page size is changed from the frontend."""
248-
if not self._initial_load_complete:
249-
return
250221
# Reset the page to 0 when page size changes to avoid invalid page states
251222
self.page = 0
252223

bigframes/display/table_widget.js

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,6 @@ function render({ model, el }) {
137137
}
138138
});
139139
model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange);
140-
model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates);
141-
model.on(`change:_initial_load_complete`, (val) => {
142-
if (val) {
143-
updateButtonStates();
144-
}
145-
});
146140

147141
// Assemble the DOM
148142
paginationContainer.appendChild(prevPage);

docs/templates/toc.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
uid: bigframes.operations.plotting.PlotAccessor
4646
- name: StructAccessor
4747
uid: bigframes.operations.structs.StructFrameAccessor
48+
- name: AI
49+
uid: bigframes.operations.ai.AIAccessor
50+
status: beta
4851
name: DataFrame
4952
- items:
5053
- name: DataFrameGroupBy

0 commit comments

Comments
 (0)