Skip to content

Commit c0d03e8

Browse files
authored
Merge branch 'main' into routine-cleanup
2 parents 93b873f + ecee2bc commit c0d03e8

File tree

63 files changed

+2775
-800
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+2775
-800
lines changed

bigframes/bigquery/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
st_intersection,
4141
st_isclosed,
4242
st_length,
43+
st_simplify,
4344
)
4445
from bigframes.bigquery._operations.json import (
4546
json_extract,
@@ -80,6 +81,7 @@
8081
st_intersection,
8182
st_isclosed,
8283
st_length,
84+
st_simplify,
8385
# json ops
8486
json_extract,
8587
json_extract_array,

bigframes/bigquery/_operations/ai.py

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,15 @@
1919
from __future__ import annotations
2020

2121
import json
22-
from typing import Any, List, Literal, Mapping, Tuple, Union
22+
from typing import Any, Iterable, List, Literal, Mapping, Tuple, Union
2323

2424
import pandas as pd
2525

26-
from bigframes import clients, dtypes, series, session
26+
from bigframes import clients, dataframe, dtypes
27+
from bigframes import pandas as bpd
28+
from bigframes import series, session
2729
from bigframes.core import convert, log_adapter
30+
from bigframes.ml import core as ml_core
2831
from bigframes.operations import ai_ops, output_schemas
2932

3033
PROMPT_TYPE = Union[
@@ -548,6 +551,91 @@ def score(
548551
return series_list[0]._apply_nary_op(operator, series_list[1:])
549552

550553

554+
@log_adapter.method_logger(custom_base_name="bigquery_ai")
555+
def forecast(
556+
df: dataframe.DataFrame | pd.DataFrame,
557+
*,
558+
data_col: str,
559+
timestamp_col: str,
560+
model: str = "TimesFM 2.0",
561+
id_cols: Iterable[str] | None = None,
562+
horizon: int = 10,
563+
confidence_level: float = 0.95,
564+
context_window: int | None = None,
565+
) -> dataframe.DataFrame:
566+
"""
567+
Forecast time series at future horizon. Using Google Research's open source TimesFM(https://github.com/google-research/timesfm) model.
568+
569+
.. note::
570+
571+
This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
572+
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
573+
and might have limited support. For more information, see the launch stage descriptions
574+
(https://cloud.google.com/products#product-launch-stages).
575+
576+
Args:
577+
df (DataFrame):
578+
The dataframe that contains the data that you want to forecast. It could be either a BigFrames Dataframe or
579+
a pandas DataFrame. If it's a pandas DataFrame, the global BigQuery session will be used to load the data.
580+
data_col (str):
581+
A str value that specifies the name of the data column. The data column contains the data to forecast.
582+
The data column must use one of the following data types: INT64, NUMERIC and FLOAT64
583+
timestamp_col (str):
584+
A str value that specified the name of the time points column.
585+
The time points column provides the time points used to generate the forecast.
586+
The time points column must use one of the following data types: TIMESTAMP, DATE and DATETIME
587+
model (str, default "TimesFM 2.0"):
588+
A str value that specifies the name of the model. TimesFM 2.0 is the only supported value, and is the default value.
589+
id_cols (Iterable[str], optional):
590+
An iterable of str value that specifies the names of one or more ID columns. Each ID identifies a unique time series to forecast.
591+
Specify one or more values for this argument in order to forecast multiple time series using a single query.
592+
The columns that you specify must use one of the following data types: STRING, INT64, ARRAY<STRING> and ARRAY<INT64>
593+
horizon (int, default 10):
594+
An int value that specifies the number of time points to forecast. The default value is 10. The valid input range is [1, 10,000].
595+
confidence_level (float, default 0.95):
596+
A FLOAT64 value that specifies the percentage of the future values that fall in the prediction interval.
597+
The default value is 0.95. The valid input range is [0, 1).
598+
context_window (int, optional):
599+
An int value that specifies the context window length used by BigQuery ML's built-in TimesFM model.
600+
The context window length determines how many of the most recent data points from the input time series are use by the model.
601+
If you don't specify a value, the AI.FORECAST function automatically chooses the smallest possible context window length to use
602+
that is still large enough to cover the number of time series data points in your input data.
603+
604+
Returns:
605+
DataFrame:
606+
The forecast dataframe matches that of the BigQuery AI.FORECAST function.
607+
See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-forecast
608+
609+
Raises:
610+
ValueError: when any column ID does not exist in the dataframe.
611+
"""
612+
613+
if isinstance(df, pd.DataFrame):
614+
# Load the pandas DataFrame with global session
615+
df = bpd.read_pandas(df)
616+
617+
columns = [timestamp_col, data_col]
618+
if id_cols:
619+
columns += id_cols
620+
for column in columns:
621+
if column not in df.columns:
622+
raise ValueError(f"Column `{column}` not found")
623+
624+
options: dict[str, Union[int, float, str, Iterable[str]]] = {
625+
"data_col": data_col,
626+
"timestamp_col": timestamp_col,
627+
"model": model,
628+
"horizon": horizon,
629+
"confidence_level": confidence_level,
630+
}
631+
if id_cols:
632+
options["id_cols"] = id_cols
633+
if context_window:
634+
options["context_window"] = context_window
635+
636+
return ml_core.BaseBqml(df._session).ai_forecast(input_data=df, options=options)
637+
638+
551639
def _separate_context_and_series(
552640
prompt: PROMPT_TYPE,
553641
) -> Tuple[List[str | None], List[series.Series]]:

bigframes/bigquery/_operations/geo.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,3 +675,23 @@ def st_length(
675675
series = series._apply_unary_op(ops.GeoStLengthOp(use_spheroid=use_spheroid))
676676
series.name = None
677677
return series
678+
679+
680+
def st_simplify(
681+
geography: "bigframes.series.Series",
682+
tolerance_meters: float,
683+
) -> "bigframes.series.Series":
684+
"""Returns a simplified version of the input geography.
685+
686+
Args:
687+
geography (bigframes.series.Series):
688+
A Series containing GEOGRAPHY data.
689+
tolerance_meters (float):
690+
A float64 value indicating the tolerance in meters.
691+
692+
Returns:
693+
a Series containing the simplified GEOGRAPHY data.
694+
"""
695+
return geography._apply_unary_op(
696+
ops.GeoStSimplifyOp(tolerance_meters=tolerance_meters)
697+
)

0 commit comments

Comments
 (0)