Skip to content

Commit bb691b0

Browse files
committed
Merge branch 'main' into shuowei-time-series-bike
2 parents 3bec4b3 + 2526448 commit bb691b0

File tree

7 files changed

+89
-2
lines changed

7 files changed

+89
-2
lines changed

bigframes/core/compile/polars/compiler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,11 @@ def _(
152152
value = None
153153
if expression.dtype is None:
154154
return pl.lit(None)
155+
156+
# Polars lit does not handle pandas timedelta well at v1.36
157+
if isinstance(value, pd.Timedelta):
158+
value = value.to_pytimedelta()
159+
155160
return pl.lit(value, _bigframes_dtype_to_polars_dtype(expression.dtype))
156161

157162
@compile_expression.register

bigframes/ml/base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,13 @@ def fit(
248248
) -> _T:
249249
return self._fit(X, y)
250250

251+
def fit_predict(
252+
self: _T,
253+
X: utils.ArrayType,
254+
y: Optional[utils.ArrayType] = None,
255+
) -> _T:
256+
return self.fit(X).predict(X)
257+
251258

252259
class RetriableRemotePredictor(BaseEstimator):
253260
def _predict_and_retry(

notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1736,7 +1736,7 @@
17361736
"provenance": []
17371737
},
17381738
"kernelspec": {
1739-
"display_name": "Python 3 (ipykernel)",
1739+
"display_name": "venv (3.10.14)",
17401740
"language": "python",
17411741
"name": "python3"
17421742
},
@@ -1750,7 +1750,7 @@
17501750
"name": "python",
17511751
"nbconvert_exporter": "python",
17521752
"pygments_lexer": "ipython3",
1753-
"version": "3.10.9"
1753+
"version": "3.10.14"
17541754
}
17551755
},
17561756
"nbformat": 4,

tests/unit/test_series_polars.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5109,3 +5109,18 @@ def test_series_item_with_empty(session):
51095109

51105110
with pytest.raises(ValueError, match=re.escape(expected_message)):
51115111
bf_s_empty.item()
5112+
5113+
5114+
def test_series_dt_total_seconds(scalars_df_index, scalars_pandas_df_index):
5115+
bf_result = scalars_df_index["duration_col"].dt.total_seconds().to_pandas()
5116+
5117+
pd_result = scalars_pandas_df_index["duration_col"].dt.total_seconds()
5118+
5119+
# Index will be object type in pandas, string type in bigframes, but same values
5120+
pd.testing.assert_series_equal(
5121+
bf_result,
5122+
pd_result,
5123+
check_index_type=False,
5124+
# bigframes uses Float64, newer pandas may use double[pyarrow]
5125+
check_dtype=False,
5126+
)

third_party/bigframes_vendored/sklearn/cluster/_kmeans.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,26 @@ def predict(
115115
"""
116116
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
117117

118+
def fit_predict(
119+
self,
120+
X,
121+
y=None,
122+
):
123+
"""Compute cluster centers and predict cluster index for each sample.
124+
125+
Convenience method; equivalent to calling fit(X) followed by predict(X).
126+
127+
Args:
128+
X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
129+
DataFrame of shape (n_samples, n_features). Training data.
130+
y (default None):
131+
Not used, present here for API consistency by convention.
132+
133+
Returns:
134+
bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted labels.
135+
"""
136+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
137+
118138
def score(
119139
self,
120140
X,

third_party/bigframes_vendored/sklearn/decomposition/_mf.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,23 @@ def predict(self, X):
9494
Returns:
9595
bigframes.dataframe.DataFrame: Predicted DataFrames."""
9696
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
97+
98+
def fit_predict(
99+
self,
100+
X,
101+
y=None,
102+
):
103+
"""Fit the model with X and generate a predicted rating for every user-item row combination for a matrix factorization model. on X.
104+
105+
Convenience method; equivalent to calling fit(X) followed by predict(X).
106+
107+
Args:
108+
X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
109+
DataFrame of shape (n_samples, n_features). Training data.
110+
y (default None):
111+
Not used, present here for API consistency by convention.
112+
113+
Returns:
114+
bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted labels.
115+
"""
116+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

third_party/bigframes_vendored/sklearn/decomposition/_pca.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,26 @@ def predict(self, X):
101101
bigframes.dataframe.DataFrame: Predicted DataFrames."""
102102
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
103103

104+
def fit_predict(
105+
self,
106+
X,
107+
y=None,
108+
):
109+
"""Fit the model with X and apply the dimensionality reduction on X.
110+
111+
Convenience method; equivalent to calling fit(X) followed by predict(X).
112+
113+
Args:
114+
X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
115+
DataFrame of shape (n_samples, n_features). Training data.
116+
y (default None):
117+
Not used, present here for API consistency by convention.
118+
119+
Returns:
120+
bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted labels.
121+
"""
122+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
123+
104124
@property
105125
def components_(self):
106126
"""Principal axes in feature space, representing the directions of maximum variance in the data.

0 commit comments

Comments
 (0)