fix struct options

tswast · tswast · commit b4e31ef764ff · 2025-12-02T20:31:31.000Z
diff --git a/bigframes/bigquery/_operations/ml.py b/bigframes/bigquery/_operations/ml.py
@@ -151,7 +151,9 @@ def evaluate(
     model: Union[bigframes.ml.base.BaseEstimator, str],
     input_: Optional[Union[dataframe.DataFrame, str]] = None,
     *,
-    options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
+    perform_aggregation: Optional[bool] = None,
+    horizon: Optional[int] = None,
+    confidence_level: Optional[float] = None,
 ) -> dataframe.DataFrame:
     """
     Evaluates a BigQuery ML model.
@@ -166,8 +168,22 @@ def evaluate(
         input_ (Union[bigframes.pandas.DataFrame, str], optional):
             The DataFrame or query to use for evaluation. If not provided, the
             evaluation data from training is used.
-        options (Mapping[str, Union[str, int, float, bool, list]], optional):
-            The OPTIONS clause, which specifies the model options.
+        perform_aggregation (bool, optional):
+            A BOOL value that indicates the level of evaluation for forecasting
+            accuracy. If you specify TRUE, then the forecasting accuracy is on
+            the time series level. If you specify FALSE, the forecasting
+            accuracy is on the timestamp level. The default value is TRUE.
+        horizon (int, optional):
+            An INT64 value that specifies the number of forecasted time points
+            against which the evaluation metrics are computed. The default value
+            is the horizon value specified in the CREATE MODEL statement for the
+            time series model, or 1000 if unspecified. When evaluating multiple
+            time series at the same time, this parameter applies to each time
+            series.
+        confidence_level (float, optional):
+            A FLOAT64 value that specifies the percentage of the future values
+            that fall in the prediction interval. The default value is 0.95. The
+            valid input range is ``[0, 1)``.
 
     Returns:
         bigframes.pandas.DataFrame:
@@ -179,7 +195,9 @@ def evaluate(
     sql = bigframes.core.sql.ml.evaluate(
         model_name=model_name,
         table=table_sql,
-        options=options,
+        perform_aggregation=perform_aggregation,
+        horizon=horizon,
+        confidence_level=confidence_level,
     )
 
     return session.read_gbq(sql)
@@ -190,7 +208,9 @@ def predict(
     model: Union[bigframes.ml.base.BaseEstimator, str],
     input_: Union[dataframe.DataFrame, str],
     *,
-    options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
+    threshold: Optional[float] = None,
+    keep_original_columns: Optional[bool] = None,
+    trial_id: Optional[int] = None,
 ) -> dataframe.DataFrame:
     """
     Runs prediction on a BigQuery ML model.
@@ -204,8 +224,15 @@ def predict(
             The model to use for prediction.
         input_ (Union[bigframes.pandas.DataFrame, str]):
             The DataFrame or query to use for prediction.
-        options (Mapping[str, Union[str, int, float, bool, list]], optional):
-            The OPTIONS clause, which specifies the model options.
+        threshold (float, optional):
+            The threshold to use for classification models.
+        keep_original_columns (bool, optional):
+            Whether to keep the original columns in the output.
+        trial_id (int, optional):
+            An INT64 value that identifies the hyperparameter tuning trial that
+            you want the function to evaluate. The function uses the optimal
+            trial by default. Only specify this argument if you ran
+            hyperparameter tuning when creating the model.
 
     Returns:
         bigframes.pandas.DataFrame:
@@ -217,7 +244,9 @@ def predict(
     sql = bigframes.core.sql.ml.predict(
         model_name=model_name,
         table=table_sql,
-        options=options,
+        threshold=threshold,
+        keep_original_columns=keep_original_columns,
+        trial_id=trial_id,
     )
 
     return session.read_gbq(sql)
@@ -228,7 +257,10 @@ def explain_predict(
     model: Union[bigframes.ml.base.BaseEstimator, str],
     input_: Union[dataframe.DataFrame, str],
     *,
-    options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
+    top_k_features: Optional[int] = None,
+    threshold: Optional[float] = None,
+    integrated_gradients_num_steps: Optional[int] = None,
+    approx_feature_contrib: Optional[bool] = None,
 ) -> dataframe.DataFrame:
     """
     Runs explainable prediction on a BigQuery ML model.
@@ -242,8 +274,19 @@ def explain_predict(
             The model to use for prediction.
         input_ (Union[bigframes.pandas.DataFrame, str]):
             The DataFrame or query to use for prediction.
-        options (Mapping[str, Union[str, int, float, bool, list]], optional):
-            The OPTIONS clause, which specifies the model options.
+        top_k_features (int, optional):
+            The number of top features to return.
+        threshold (float, optional):
+            The threshold for binary classification models.
+        integrated_gradients_num_steps (int, optional):
+            an INT64 value that specifies the number of steps to sample between
+            the example being explained and its baseline. This value is used to
+            approximate the integral in integrated gradients attribution
+            methods. Increasing the value improves the precision of feature
+            attributions, but can be slower and more computationally expensive.
+        approx_feature_contrib (bool, optional):
+            A BOOL value that indicates whether to use an approximate feature
+            contribution method in the XGBoost model explanation.
 
     Returns:
         bigframes.pandas.DataFrame:
@@ -255,7 +298,10 @@ def explain_predict(
     sql = bigframes.core.sql.ml.explain_predict(
         model_name=model_name,
         table=table_sql,
-        options=options,
+        top_k_features=top_k_features,
+        threshold=threshold,
+        integrated_gradients_num_steps=integrated_gradients_num_steps,
+        approx_feature_contrib=approx_feature_contrib,
     )
 
     return session.read_gbq(sql)
@@ -265,7 +311,7 @@ def explain_predict(
 def global_explain(
     model: Union[bigframes.ml.base.BaseEstimator, str],
     *,
-    options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
+    class_level_explain: Optional[bool] = None,
 ) -> dataframe.DataFrame:
     """
     Gets global explanations for a BigQuery ML model.
@@ -277,8 +323,8 @@ def global_explain(
     Args:
         model (bigframes.ml.base.BaseEstimator or str):
             The model to get explanations from.
-        options (Mapping[str, Union[str, int, float, bool, list]], optional):
-            The OPTIONS clause, which specifies the model options.
+        class_level_explain (bool, optional):
+            Whether to return class-level explanations.
 
     Returns:
         bigframes.pandas.DataFrame:
@@ -287,7 +333,7 @@ def global_explain(
     model_name, session = _get_model_name_and_session(model)
     sql = bigframes.core.sql.ml.global_explain(
         model_name=model_name,
-        options=options,
+        class_level_explain=class_level_explain,
     )
 
     return session.read_gbq(sql)
diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py
@@ -14,7 +14,7 @@
 
 from __future__ import annotations
 
-from typing import Mapping, Optional, Union
+from typing import Dict, Mapping, Optional, Union
 
 import bigframes.core.compile.googlesql as googlesql
 import bigframes.core.sql
@@ -94,33 +94,48 @@ def create_model_ddl(
             ddl += f"AS (\n  {', '.join(parts)}\n)"
         else:
             # Just training_data is treated as the query_statement
-            ddl += f"AS {training_data}"
+            ddl += f"AS {training_data}\n"
 
     return ddl
 
 
+def _build_struct_sql(
+    struct_options: Mapping[str, Union[str, int, float, bool]]
+) -> str:
+    if not struct_options:
+        return ""
+
+    rendered_options = []
+    for option_name, option_value in struct_options.items():
+        rendered_val = bigframes.core.sql.simple_literal(option_value)
+        rendered_options.append(f"{rendered_val} AS {option_name}")
+    return f", STRUCT({', '.join(rendered_options)})"
+
+
 def evaluate(
     model_name: str,
     *,
     table: Optional[str] = None,
-    options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
+    perform_aggregation: Optional[bool] = None,
+    horizon: Optional[int] = None,
+    confidence_level: Optional[float] = None,
 ) -> str:
-    """Encode the ML.EVALUATE statement.
-
+    """Encode the ML.EVAluate statement.
     See https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate for reference.
     """
+    struct_options: Dict[str, Union[str, int, float, bool]] = {}
+    if perform_aggregation is not None:
+        struct_options["perform_aggregation"] = perform_aggregation
+    if horizon is not None:
+        struct_options["horizon"] = horizon
+    if confidence_level is not None:
+        struct_options["confidence_level"] = confidence_level
+
     sql = f"SELECT * FROM ML.EVALUATE(MODEL {googlesql.identifier(model_name)}"
     if table:
         sql += f", ({table})"
-    if options:
-        rendered_options = []
-        for option_name, option_value in options.items():
-            if isinstance(option_value, (list, tuple)):
-                rendered_val = bigframes.core.sql.simple_literal(list(option_value))
-            else:
-                rendered_val = bigframes.core.sql.simple_literal(option_value)
-            rendered_options.append(f"{option_name} = {rendered_val}")
-        sql += f", OPTIONS({', '.join(rendered_options)})"
+
+    sql += _build_struct_sql(struct_options)
     sql += ")\n"
     return sql
 
@@ -129,24 +144,25 @@ def predict(
     model_name: str,
     table: str,
     *,
-    options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
+    threshold: Optional[float] = None,
+    keep_original_columns: Optional[bool] = None,
+    trial_id: Optional[int] = None,
 ) -> str:
     """Encode the ML.PREDICT statement.
-
     See https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-predict for reference.
     """
+    struct_options = {}
+    if threshold is not None:
+        struct_options["threshold"] = threshold
+    if keep_original_columns is not None:
+        struct_options["keep_original_columns"] = keep_original_columns
+    if trial_id is not None:
+        struct_options["trial_id"] = trial_id
+
     sql = (
         f"SELECT * FROM ML.PREDICT(MODEL {googlesql.identifier(model_name)}, ({table})"
     )
-    if options:
-        rendered_options = []
-        for option_name, option_value in options.items():
-            if isinstance(option_value, (list, tuple)):
-                rendered_val = bigframes.core.sql.simple_literal(list(option_value))
-            else:
-                rendered_val = bigframes.core.sql.simple_literal(option_value)
-            rendered_options.append(f"{option_name} = {rendered_val}")
-        sql += f", OPTIONS({', '.join(rendered_options)})"
+    sql += _build_struct_sql(struct_options)
     sql += ")\n"
     return sql
 
@@ -155,44 +171,45 @@ def explain_predict(
     model_name: str,
     table: str,
     *,
-    options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
+    top_k_features: Optional[int] = None,
+    threshold: Optional[float] = None,
+    integrated_gradients_num_steps: Optional[int] = None,
+    approx_feature_contrib: Optional[bool] = None,
 ) -> str:
     """Encode the ML.EXPLAIN_PREDICT statement.
-
     See https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-explain-predict for reference.
     """
+    struct_options: Dict[str, Union[str, int, float, bool]] = {}
+    if top_k_features is not None:
+        struct_options["top_k_features"] = top_k_features
+    if threshold is not None:
+        struct_options["threshold"] = threshold
+    if integrated_gradients_num_steps is not None:
+        struct_options[
+            "integrated_gradients_num_steps"
+        ] = integrated_gradients_num_steps
+    if approx_feature_contrib is not None:
+        struct_options["approx_feature_contrib"] = approx_feature_contrib
+
     sql = f"SELECT * FROM ML.EXPLAIN_PREDICT(MODEL {googlesql.identifier(model_name)}, ({table})"
-    if options:
-        rendered_options = []
-        for option_name, option_value in options.items():
-            if isinstance(option_value, (list, tuple)):
-                rendered_val = bigframes.core.sql.simple_literal(list(option_value))
-            else:
-                rendered_val = bigframes.core.sql.simple_literal(option_value)
-            rendered_options.append(f"{option_name} = {rendered_val}")
-        sql += f", OPTIONS({', '.join(rendered_options)})"
+    sql += _build_struct_sql(struct_options)
     sql += ")\n"
     return sql
 
 
 def global_explain(
     model_name: str,
     *,
-    options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
+    class_level_explain: Optional[bool] = None,
 ) -> str:
     """Encode the ML.GLOBAL_EXPLAIN statement.
-
     See https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-global-explain for reference.
     """
+    struct_options = {}
+    if class_level_explain is not None:
+        struct_options["class_level_explain"] = class_level_explain
+
     sql = f"SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL {googlesql.identifier(model_name)}"
-    if options:
-        rendered_options = []
-        for option_name, option_value in options.items():
-            if isinstance(option_value, (list, tuple)):
-                rendered_val = bigframes.core.sql.simple_literal(list(option_value))
-            else:
-                rendered_val = bigframes.core.sql.simple_literal(option_value)
-            rendered_options.append(f"{option_name} = {rendered_val}")
-        sql += f", OPTIONS({', '.join(rendered_options)})"
+    sql += _build_struct_sql(struct_options)
     sql += ")\n"
     return sql
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_create_model_basic/create_model_basic.sql b/tests/unit/core/sql/snapshots/test_ml/test_create_model_basic/create_model_basic.sql
@@ -1,3 +1,3 @@
 CREATE MODEL `my_project.my_dataset.my_model`
 OPTIONS(model_type = 'LINEAR_REG', input_label_cols = ['label'])
-AS SELECT * FROM my_table
+AS SELECT * FROM my_table
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_create_model_if_not_exists/create_model_if_not_exists.sql b/tests/unit/core/sql/snapshots/test_ml/test_create_model_if_not_exists/create_model_if_not_exists.sql
@@ -1,3 +1,3 @@
 CREATE MODEL IF NOT EXISTS `my_model`
 OPTIONS(model_type = 'KMEANS')
-AS SELECT * FROM t
+AS SELECT * FROM t
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_create_model_list_option/create_model_list_option.sql b/tests/unit/core/sql/snapshots/test_ml/test_create_model_list_option/create_model_list_option.sql
@@ -1,3 +1,3 @@
 CREATE MODEL `my_model`
 OPTIONS(hidden_units = [32, 16], dropout = 0.2)
-AS SELECT * FROM t
+AS SELECT * FROM t
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_create_model_replace/create_model_replace.sql b/tests/unit/core/sql/snapshots/test_ml/test_create_model_replace/create_model_replace.sql
@@ -1,3 +1,3 @@
 CREATE OR REPLACE MODEL `my_model`
 OPTIONS(model_type = 'LOGISTIC_REG')
-AS SELECT * FROM t
+AS SELECT * FROM t
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_create_model_transform/create_model_transform.sql b/tests/unit/core/sql/snapshots/test_ml/test_create_model_transform/create_model_transform.sql
@@ -1,4 +1,4 @@
 CREATE MODEL `my_model`
 TRANSFORM (ML.STANDARD_SCALER(c1) OVER() AS c1_scaled, c2)
 OPTIONS(model_type = 'LINEAR_REG')
-AS SELECT c1, c2, label FROM t
+AS SELECT c1, c2, label FROM t
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_evaluate_model_with_options/evaluate_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_evaluate_model_with_options/evaluate_model_with_options.sql
@@ -1 +1 @@
-SELECT * FROM ML.EVALUATE(MODEL `my_model`, OPTIONS(threshold = 0.5))
+SELECT * FROM ML.EVALUATE(MODEL `my_model`, STRUCT(False AS perform_aggregation, 10 AS horizon, 0.95 AS confidence_level))
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql
@@ -1 +1 @@
-SELECT * FROM ML.EXPLAIN_PREDICT(MODEL `my_model`, (SELECT * FROM new_data), OPTIONS(top_k_features = 5))
+SELECT * FROM ML.EXPLAIN_PREDICT(MODEL `my_model`, (SELECT * FROM new_data), STRUCT(5 AS top_k_features))
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_global_explain_model_with_options/global_explain_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_global_explain_model_with_options/global_explain_model_with_options.sql
@@ -1 +1 @@
-SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL `my_model`, OPTIONS(num_features = 10))
+SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL `my_model`, STRUCT(True AS class_level_explain))
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_predict_model_with_options/predict_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_predict_model_with_options/predict_model_with_options.sql
@@ -1 +1 @@
-SELECT * FROM ML.PREDICT(MODEL `my_model`, (SELECT * FROM new_data), OPTIONS(quantiles = [0.25, 0.75]))
+SELECT * FROM ML.PREDICT(MODEL `my_model`, (SELECT * FROM new_data), STRUCT(True AS keep_original_columns))
diff --git a/tests/unit/core/sql/test_ml.py b/tests/unit/core/sql/test_ml.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-SELECT * FROM ML.EVALUATE(MODEL `my_model`, OPTIONS(threshold = 0.5))
	`1`	+SELECT * FROM ML.EVALUATE(MODEL `my_model`, STRUCT(False AS perform_aggregation, 10 AS horizon, 0.95 AS confidence_level))
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-SELECT * FROM ML.EXPLAIN_PREDICT(MODEL `my_model`, (SELECT * FROM new_data), OPTIONS(top_k_features = 5))
	`1`	+SELECT * FROM ML.EXPLAIN_PREDICT(MODEL `my_model`, (SELECT * FROM new_data), STRUCT(5 AS top_k_features))
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL `my_model`, OPTIONS(num_features = 10))
	`1`	+SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL `my_model`, STRUCT(True AS class_level_explain))