diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py index fe468cb28f..9b38702cce 100644 --- a/bigframes/ml/base.py +++ b/bigframes/ml/base.py @@ -248,6 +248,13 @@ def fit( ) -> _T: return self._fit(X, y) + def fit_predict( + self: _T, + X: utils.ArrayType, + y: Optional[utils.ArrayType] = None, + ) -> _T: + return self.fit(X).predict(X) + class RetriableRemotePredictor(BaseEstimator): def _predict_and_retry( diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb index bc55096942..08891d2b44 100644 --- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb +++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb @@ -1736,7 +1736,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "venv (3.10.14)", "language": "python", "name": "python3" }, @@ -1750,7 +1750,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py index 44eefeddd7..2b1778eec8 100644 --- a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py +++ b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py @@ -115,6 +115,26 @@ def predict( """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def fit_predict( + self, + X, + y=None, + ): + """Compute cluster centers and predict cluster index for each sample. + + Convenience method; equivalent to calling fit(X) followed by predict(X). + + Args: + X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series): + DataFrame of shape (n_samples, n_features). Training data. + y (default None): + Not used, present here for API consistency by convention. + + Returns: + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted labels. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def score( self, X, diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_mf.py b/third_party/bigframes_vendored/sklearn/decomposition/_mf.py index e487a2e7c1..7dad196237 100644 --- a/third_party/bigframes_vendored/sklearn/decomposition/_mf.py +++ b/third_party/bigframes_vendored/sklearn/decomposition/_mf.py @@ -94,3 +94,23 @@ def predict(self, X): Returns: bigframes.dataframe.DataFrame: Predicted DataFrames.""" raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def fit_predict( + self, + X, + y=None, + ): + """Fit the model with X and generate a predicted rating for every user-item row combination for a matrix factorization model. on X. + + Convenience method; equivalent to calling fit(X) followed by predict(X). + + Args: + X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series): + DataFrame of shape (n_samples, n_features). Training data. + y (default None): + Not used, present here for API consistency by convention. + + Returns: + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted labels. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py index 3535edc8f9..f90e193064 100644 --- a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py +++ b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py @@ -101,6 +101,26 @@ def predict(self, X): bigframes.dataframe.DataFrame: Predicted DataFrames.""" raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def fit_predict( + self, + X, + y=None, + ): + """Fit the model with X and apply the dimensionality reduction on X. + + Convenience method; equivalent to calling fit(X) followed by predict(X). + + Args: + X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series): + DataFrame of shape (n_samples, n_features). Training data. + y (default None): + Not used, present here for API consistency by convention. + + Returns: + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted labels. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + @property def components_(self): """Principal axes in feature space, representing the directions of maximum variance in the data.