From 622b317e0fdc0a03780473779bf12dcfb591d058 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 10 Dec 2025 00:01:41 +0000 Subject: [PATCH 1/2] feat: add fit_predict method to ml unsupervised models --- bigframes/ml/base.py | 3 +++ .../sklearn/cluster/_kmeans.py | 20 +++++++++++++++++++ .../sklearn/decomposition/_mf.py | 20 +++++++++++++++++++ .../sklearn/decomposition/_pca.py | 20 +++++++++++++++++++ 4 files changed, 63 insertions(+) diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py index fe468cb28f..4534a84aea 100644 --- a/bigframes/ml/base.py +++ b/bigframes/ml/base.py @@ -248,6 +248,9 @@ def fit( ) -> _T: return self._fit(X, y) + def fit_predict(self, X: utils.ArrayType, y=None) -> bpd.DataFrame: # ignored + return self.fit(X).predict(X) + class RetriableRemotePredictor(BaseEstimator): def _predict_and_retry( diff --git a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py index 44eefeddd7..2b1778eec8 100644 --- a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py +++ b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py @@ -115,6 +115,26 @@ def predict( """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def fit_predict( + self, + X, + y=None, + ): + """Compute cluster centers and predict cluster index for each sample. + + Convenience method; equivalent to calling fit(X) followed by predict(X). + + Args: + X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series): + DataFrame of shape (n_samples, n_features). Training data. + y (default None): + Not used, present here for API consistency by convention. + + Returns: + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted labels. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def score( self, X, diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_mf.py b/third_party/bigframes_vendored/sklearn/decomposition/_mf.py index e487a2e7c1..7dad196237 100644 --- a/third_party/bigframes_vendored/sklearn/decomposition/_mf.py +++ b/third_party/bigframes_vendored/sklearn/decomposition/_mf.py @@ -94,3 +94,23 @@ def predict(self, X): Returns: bigframes.dataframe.DataFrame: Predicted DataFrames.""" raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def fit_predict( + self, + X, + y=None, + ): + """Fit the model with X and generate a predicted rating for every user-item row combination for a matrix factorization model. on X. + + Convenience method; equivalent to calling fit(X) followed by predict(X). + + Args: + X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series): + DataFrame of shape (n_samples, n_features). Training data. + y (default None): + Not used, present here for API consistency by convention. + + Returns: + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted labels. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py index 3535edc8f9..f90e193064 100644 --- a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py +++ b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py @@ -101,6 +101,26 @@ def predict(self, X): bigframes.dataframe.DataFrame: Predicted DataFrames.""" raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def fit_predict( + self, + X, + y=None, + ): + """Fit the model with X and apply the dimensionality reduction on X. + + Convenience method; equivalent to calling fit(X) followed by predict(X). + + Args: + X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series): + DataFrame of shape (n_samples, n_features). Training data. + y (default None): + Not used, present here for API consistency by convention. + + Returns: + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted labels. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + @property def components_(self): """Principal axes in feature space, representing the directions of maximum variance in the data. From bb696159566a9160f401c89c3f612657db61e495 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 10 Dec 2025 00:05:21 +0000 Subject: [PATCH 2/2] fix --- bigframes/ml/base.py | 6 +++++- notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py index 4534a84aea..9b38702cce 100644 --- a/bigframes/ml/base.py +++ b/bigframes/ml/base.py @@ -248,7 +248,11 @@ def fit( ) -> _T: return self._fit(X, y) - def fit_predict(self, X: utils.ArrayType, y=None) -> bpd.DataFrame: # ignored + def fit_predict( + self: _T, + X: utils.ArrayType, + y: Optional[utils.ArrayType] = None, + ) -> _T: return self.fit(X).predict(X) diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb index bc55096942..08891d2b44 100644 --- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb +++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb @@ -1736,7 +1736,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "venv (3.10.14)", "language": "python", "name": "python3" }, @@ -1750,7 +1750,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.10.14" } }, "nbformat": 4,