Skip to content

Commit 459c5b2

Browse files
Merge branch 'feat-st-regionstats' of https://github.com/googleapis/python-bigquery-dataframes into feat-st-regionstats
2 parents 21243e4 + 8b681c0 commit 459c5b2

File tree

11 files changed

+594
-285
lines changed

11 files changed

+594
-285
lines changed

bigframes/bigquery/_operations/ai.py

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,15 @@
1919
from __future__ import annotations
2020

2121
import json
22-
from typing import Any, List, Literal, Mapping, Tuple, Union
22+
from typing import Any, Iterable, List, Literal, Mapping, Tuple, Union
2323

2424
import pandas as pd
2525

26-
from bigframes import clients, dtypes, series, session
26+
from bigframes import clients, dataframe, dtypes
27+
from bigframes import pandas as bpd
28+
from bigframes import series, session
2729
from bigframes.core import convert, log_adapter
30+
from bigframes.ml import core as ml_core
2831
from bigframes.operations import ai_ops, output_schemas
2932

3033
PROMPT_TYPE = Union[
@@ -548,6 +551,91 @@ def score(
548551
return series_list[0]._apply_nary_op(operator, series_list[1:])
549552

550553

554+
@log_adapter.method_logger(custom_base_name="bigquery_ai")
555+
def forecast(
556+
df: dataframe.DataFrame | pd.DataFrame,
557+
*,
558+
data_col: str,
559+
timestamp_col: str,
560+
model: str = "TimesFM 2.0",
561+
id_cols: Iterable[str] | None = None,
562+
horizon: int = 10,
563+
confidence_level: float = 0.95,
564+
context_window: int | None = None,
565+
) -> dataframe.DataFrame:
566+
"""
567+
Forecast time series at future horizon. Using Google Research's open source TimesFM(https://github.com/google-research/timesfm) model.
568+
569+
.. note::
570+
571+
This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
572+
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
573+
and might have limited support. For more information, see the launch stage descriptions
574+
(https://cloud.google.com/products#product-launch-stages).
575+
576+
Args:
577+
df (DataFrame):
578+
The dataframe that contains the data that you want to forecast. It could be either a BigFrames Dataframe or
579+
a pandas DataFrame. If it's a pandas DataFrame, the global BigQuery session will be used to load the data.
580+
data_col (str):
581+
A str value that specifies the name of the data column. The data column contains the data to forecast.
582+
The data column must use one of the following data types: INT64, NUMERIC and FLOAT64
583+
timestamp_col (str):
584+
A str value that specified the name of the time points column.
585+
The time points column provides the time points used to generate the forecast.
586+
The time points column must use one of the following data types: TIMESTAMP, DATE and DATETIME
587+
model (str, default "TimesFM 2.0"):
588+
A str value that specifies the name of the model. TimesFM 2.0 is the only supported value, and is the default value.
589+
id_cols (Iterable[str], optional):
590+
An iterable of str value that specifies the names of one or more ID columns. Each ID identifies a unique time series to forecast.
591+
Specify one or more values for this argument in order to forecast multiple time series using a single query.
592+
The columns that you specify must use one of the following data types: STRING, INT64, ARRAY<STRING> and ARRAY<INT64>
593+
horizon (int, default 10):
594+
An int value that specifies the number of time points to forecast. The default value is 10. The valid input range is [1, 10,000].
595+
confidence_level (float, default 0.95):
596+
A FLOAT64 value that specifies the percentage of the future values that fall in the prediction interval.
597+
The default value is 0.95. The valid input range is [0, 1).
598+
context_window (int, optional):
599+
An int value that specifies the context window length used by BigQuery ML's built-in TimesFM model.
600+
The context window length determines how many of the most recent data points from the input time series are use by the model.
601+
If you don't specify a value, the AI.FORECAST function automatically chooses the smallest possible context window length to use
602+
that is still large enough to cover the number of time series data points in your input data.
603+
604+
Returns:
605+
DataFrame:
606+
The forecast dataframe matches that of the BigQuery AI.FORECAST function.
607+
See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-forecast
608+
609+
Raises:
610+
ValueError: when any column ID does not exist in the dataframe.
611+
"""
612+
613+
if isinstance(df, pd.DataFrame):
614+
# Load the pandas DataFrame with global session
615+
df = bpd.read_pandas(df)
616+
617+
columns = [timestamp_col, data_col]
618+
if id_cols:
619+
columns += id_cols
620+
for column in columns:
621+
if column not in df.columns:
622+
raise ValueError(f"Column `{column}` not found")
623+
624+
options: dict[str, Union[int, float, str, Iterable[str]]] = {
625+
"data_col": data_col,
626+
"timestamp_col": timestamp_col,
627+
"model": model,
628+
"horizon": horizon,
629+
"confidence_level": confidence_level,
630+
}
631+
if id_cols:
632+
options["id_cols"] = id_cols
633+
if context_window:
634+
options["context_window"] = context_window
635+
636+
return ml_core.BaseBqml(df._session).ai_forecast(input_data=df, options=options)
637+
638+
551639
def _separate_context_and_series(
552640
prompt: PROMPT_TYPE,
553641
) -> Tuple[List[str | None], List[series.Series]]:

bigframes/dataframe.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5328,12 +5328,16 @@ def _throw_if_null_index(self, opname: str):
53285328
@property
53295329
def semantics(self):
53305330
msg = bfe.format_message(
5331-
"The 'semantics' property will be removed. Please use 'ai' instead."
5331+
"The 'semantics' property will be removed. Please use 'bigframes.bigquery.ai' instead."
53325332
)
53335333
warnings.warn(msg, category=FutureWarning)
53345334
return bigframes.operations.semantics.Semantics(self)
53355335

53365336
@property
53375337
def ai(self):
53385338
"""Returns the accessor for AI operators."""
5339+
msg = bfe.format_message(
5340+
"The 'ai' property will be removed. Please use 'bigframes.bigquery.ai' instead."
5341+
)
5342+
warnings.warn(msg, category=FutureWarning)
53395343
return bigframes.operations.ai.AIAccessor(self)

bigframes/display/anywidget.py

Lines changed: 52 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@
2323
import pandas as pd
2424

2525
import bigframes
26+
from bigframes.core import blocks
2627
import bigframes.dataframe
2728
import bigframes.display.html
2829

29-
# anywidget and traitlets are optional dependencies. We don't want the import of this
30-
# module to fail if they aren't installed, though. Instead, we try to limit the surface that
31-
# these packages could affect. This makes unit testing easier and ensures we don't
32-
# accidentally make these required packages.
30+
# anywidget and traitlets are optional dependencies. We don't want the import of
31+
# this module to fail if they aren't installed, though. Instead, we try to
32+
# limit the surface that these packages could affect. This makes unit testing
33+
# easier and ensures we don't accidentally make these required packages.
3334
try:
3435
import anywidget
3536
import traitlets
@@ -46,9 +47,21 @@
4647

4748

4849
class TableWidget(WIDGET_BASE):
50+
"""An interactive, paginated table widget for BigFrames DataFrames.
51+
52+
This widget provides a user-friendly way to display and navigate through
53+
large BigQuery DataFrames within a Jupyter environment.
4954
"""
50-
An interactive, paginated table widget for BigFrames DataFrames.
51-
"""
55+
56+
page = traitlets.Int(0).tag(sync=True)
57+
page_size = traitlets.Int(0).tag(sync=True)
58+
row_count = traitlets.Int(0).tag(sync=True)
59+
table_html = traitlets.Unicode().tag(sync=True)
60+
_initial_load_complete = traitlets.Bool(False).tag(sync=True)
61+
_batches: Optional[blocks.PandasBatches] = None
62+
_error_message = traitlets.Unicode(allow_none=True, default_value=None).tag(
63+
sync=True
64+
)
5265

5366
def __init__(self, dataframe: bigframes.dataframe.DataFrame):
5467
"""Initialize the TableWidget.
@@ -61,10 +74,11 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
6174
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
6275
)
6376

64-
super().__init__()
6577
self._dataframe = dataframe
6678

67-
# Initialize attributes that might be needed by observers FIRST
79+
super().__init__()
80+
81+
# Initialize attributes that might be needed by observers first
6882
self._table_id = str(uuid.uuid4())
6983
self._all_data_loaded = False
7084
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
@@ -73,9 +87,6 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
7387
# respect display options for initial page size
7488
initial_page_size = bigframes.options.display.max_rows
7589

76-
# Initialize data fetching attributes.
77-
self._batches = dataframe._to_pandas_batches(page_size=initial_page_size)
78-
7990
# set traitlets properties that trigger observers
8091
self.page_size = initial_page_size
8192

@@ -84,12 +95,21 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
8495
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
8596
# before we get here so that the count might already be cached.
8697
# TODO(b/452747934): Allow row_count to be None and check to see if
87-
# there are multiple pages and show "page 1 of many" in this case.
88-
self.row_count = self._batches.total_rows or 0
98+
# there are multiple pages and show "page 1 of many" in this case
99+
self._reset_batches_for_new_page_size()
100+
if self._batches is None or self._batches.total_rows is None:
101+
self._error_message = "Could not determine total row count. Data might be unavailable or an error occurred."
102+
self.row_count = 0
103+
else:
104+
self.row_count = self._batches.total_rows
89105

90106
# get the initial page
91107
self._set_table_html()
92108

109+
# Signals to the frontend that the initial data load is complete.
110+
# Also used as a guard to prevent observers from firing during initialization.
111+
self._initial_load_complete = True
112+
93113
@functools.cached_property
94114
def _esm(self):
95115
"""Load JavaScript code from external file."""
@@ -100,11 +120,6 @@ def _css(self):
100120
"""Load CSS code from external file."""
101121
return resources.read_text(bigframes.display, "table_widget.css")
102122

103-
page = traitlets.Int(0).tag(sync=True)
104-
page_size = traitlets.Int(25).tag(sync=True)
105-
row_count = traitlets.Int(0).tag(sync=True)
106-
table_html = traitlets.Unicode().tag(sync=True)
107-
108123
@traitlets.validate("page")
109124
def _validate_page(self, proposal: Dict[str, Any]) -> int:
110125
"""Validate and clamp the page number to a valid range.
@@ -171,7 +186,10 @@ def _get_next_batch(self) -> bool:
171186
def _batch_iterator(self) -> Iterator[pd.DataFrame]:
172187
"""Lazily initializes and returns the batch iterator."""
173188
if self._batch_iter is None:
174-
self._batch_iter = iter(self._batches)
189+
if self._batches is None:
190+
self._batch_iter = iter([])
191+
else:
192+
self._batch_iter = iter(self._batches)
175193
return self._batch_iter
176194

177195
@property
@@ -181,15 +199,22 @@ def _cached_data(self) -> pd.DataFrame:
181199
return pd.DataFrame(columns=self._dataframe.columns)
182200
return pd.concat(self._cached_batches, ignore_index=True)
183201

184-
def _reset_batches_for_new_page_size(self):
202+
def _reset_batches_for_new_page_size(self) -> None:
185203
"""Reset the batch iterator when page size changes."""
186204
self._batches = self._dataframe._to_pandas_batches(page_size=self.page_size)
205+
187206
self._cached_batches = []
188207
self._batch_iter = None
189208
self._all_data_loaded = False
190209

191-
def _set_table_html(self):
210+
def _set_table_html(self) -> None:
192211
"""Sets the current html data based on the current page and page size."""
212+
if self._error_message:
213+
self.table_html = (
214+
f"<div class='bigframes-error-message'>{self._error_message}</div>"
215+
)
216+
return
217+
193218
start = self.page * self.page_size
194219
end = start + self.page_size
195220

@@ -211,13 +236,17 @@ def _set_table_html(self):
211236
)
212237

213238
@traitlets.observe("page")
214-
def _page_changed(self, _change: Dict[str, Any]):
239+
def _page_changed(self, _change: Dict[str, Any]) -> None:
215240
"""Handler for when the page number is changed from the frontend."""
241+
if not self._initial_load_complete:
242+
return
216243
self._set_table_html()
217244

218245
@traitlets.observe("page_size")
219-
def _page_size_changed(self, _change: Dict[str, Any]):
246+
def _page_size_changed(self, _change: Dict[str, Any]) -> None:
220247
"""Handler for when the page size is changed from the frontend."""
248+
if not self._initial_load_complete:
249+
return
221250
# Reset the page to 0 when page size changes to avoid invalid page states
222251
self.page = 0
223252

bigframes/display/table_widget.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,12 @@ function render({ model, el }) {
137137
}
138138
});
139139
model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange);
140+
model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates);
141+
model.on(`change:_initial_load_complete`, (val) => {
142+
if (val) {
143+
updateButtonStates();
144+
}
145+
});
140146

141147
// Assemble the DOM
142148
paginationContainer.appendChild(prevPage);

docs/templates/toc.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,6 @@
4545
uid: bigframes.operations.plotting.PlotAccessor
4646
- name: StructAccessor
4747
uid: bigframes.operations.structs.StructFrameAccessor
48-
- name: AI
49-
uid: bigframes.operations.ai.AIAccessor
50-
status: beta
5148
name: DataFrame
5249
- items:
5350
- name: DataFrameGroupBy

0 commit comments

Comments
 (0)