Skip to content

Commit 5df828b

Browse files
committed
add tests for session scoped methods
1 parent dc02baf commit 5df828b

File tree

12 files changed

+171
-5072
lines changed

12 files changed

+171
-5072
lines changed

bigframes/core/blocks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2471,7 +2471,7 @@ def _align_series_block_axis_1(
24712471
def _align_pd_series_axis_1(
24722472
self, other: pd.Series, how: str
24732473
) -> Tuple[Block, pd.Index, Sequence[Tuple[ex.RefOrConstant, ex.RefOrConstant]]]:
2474-
if self.column_labels.astype("object").equals(other.index.astype("object")):
2474+
if self.column_labels.equals(other.index):
24752475
columns, lcol_indexer, rcol_indexer = self.column_labels, None, None
24762476
else:
24772477
if not (self.column_labels.is_unique and other.index.is_unique):

bigframes/core/compile/polars/compiler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -493,9 +493,9 @@ def compile_agg_op(
493493
if isinstance(op, agg_ops.MedianOp):
494494
return pl.median(*inputs)
495495
if isinstance(op, agg_ops.AllOp):
496-
return pl.col(inputs).cast(pl.Boolean).all()
496+
return pl.all(*inputs)
497497
if isinstance(op, agg_ops.AnyOp):
498-
return pl.col(inputs).cast(pl.Boolean).any()
498+
return pl.any(*inputs) # type: ignore
499499
if isinstance(op, agg_ops.NuniqueOp):
500500
return pl.col(*inputs).drop_nulls().n_unique()
501501
if isinstance(op, agg_ops.MinOp):

bigframes/dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ def _getitem_label(self, key: blocks.Label):
688688
return DataFrame(block)
689689

690690
if len(col_ids) == 1:
691-
return bigframes.series.Series(block, name=key)
691+
return bigframes.series.Series(block)
692692
return DataFrame(block)
693693

694694
# Bool Series selects rows

bigframes/operations/base.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from __future__ import annotations
1616

17-
import enum
1817
import typing
1918
from typing import List, Sequence, Union
2019

@@ -36,18 +35,6 @@
3635
import bigframes.session
3736

3837

39-
class Default(enum.Enum):
40-
"""Sentinel that can disambiguate explicit None from missing.
41-
42-
See https://stackoverflow.com/a/76606310/101923
43-
"""
44-
45-
token = 0
46-
47-
48-
DEFAULT = Default.token
49-
50-
5138
class SeriesMethods:
5239
def __init__(
5340
self,
@@ -56,7 +43,7 @@ def __init__(
5643
dtype: typing.Optional[
5744
bigframes.dtypes.DtypeString | bigframes.dtypes.Dtype
5845
] = None,
59-
name: str | None | Default = DEFAULT,
46+
name: str | None = None,
6047
copy: typing.Optional[bool] = None,
6148
*,
6249
session: typing.Optional[bigframes.session.Session] = None,
@@ -120,7 +107,6 @@ def __init__(
120107
block = data_block
121108

122109
if block:
123-
# Data was a bigframes object.
124110
assert len(block.value_columns) == 1
125111
assert len(block.column_labels) == 1
126112
if index is not None: # reindexing operation
@@ -129,27 +115,23 @@ def __init__(
129115
idx_cols = idx_block.index_columns
130116
block, _ = idx_block.join(block, how="left")
131117
block = block.with_index_labels(bf_index.names)
132-
if name is not DEFAULT:
118+
if name:
133119
block = block.with_column_labels([name])
134120
if dtype:
135121
bf_dtype = bigframes.dtypes.bigframes_type(dtype)
136122
block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype))
137123
else:
138-
# Data was local.
139124
if isinstance(dtype, str) and dtype.lower() == "json":
140125
dtype = bigframes.dtypes.JSON_DTYPE
141126
pd_series = pd.Series(
142127
data=data,
143128
index=index, # type:ignore
144129
dtype=dtype, # type:ignore
145-
name=name if name is not DEFAULT else None,
130+
name=name,
146131
)
147-
name = pd_series.name # type: ignore
148132
block = read_pandas_func(pd_series)._get_block() # type:ignore
149-
block = block.with_column_labels([name])
150133

151134
assert block is not None
152-
153135
self._block: blocks.Block = block
154136

155137
@property
@@ -178,8 +160,7 @@ def _apply_unary_op(
178160
block, result_id = self._block.apply_unary_op(
179161
self._value_column, op, result_label=self._name
180162
)
181-
result = series.Series(block.select_column(result_id), name=self._name)
182-
return result
163+
return series.Series(block.select_column(result_id))
183164

184165
def _apply_binary_op(
185166
self,

bigframes/pandas/core/tools/timedeltas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def to_timedelta(
3535
return arg._apply_unary_op(ops.ToTimedeltaOp(canonical_unit))
3636

3737
if pdtypes.is_list_like(arg):
38-
return to_timedelta(series.Series(arg), unit, session=session)
38+
return to_timedelta(series.Series(arg, session=session), unit, session=session)
3939

4040
return pd.to_timedelta(arg, unit)
4141

bigframes/testing/polars_session.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,17 +95,10 @@ def __init__(self):
9595

9696
def read_pandas(self, pandas_dataframe, write_engine="default"):
9797
# override read_pandas to always keep data local-only
98-
if isinstance(pandas_dataframe, (pandas.Series, pandas.Index)):
98+
if isinstance(pandas_dataframe, pandas.Series):
9999
pandas_dataframe = pandas_dataframe.to_frame()
100100
local_block = bigframes.core.blocks.Block.from_local(pandas_dataframe, self)
101-
bf_df = bigframes.dataframe.DataFrame(local_block)
102-
if isinstance(pandas_dataframe, pandas.Series):
103-
series = bf_df[bf_df.columns[0]]
104-
series.name = pandas_dataframe.name
105-
return series
106-
if isinstance(pandas_dataframe, pandas.Index):
107-
return bf_df.index
108-
return bf_df
101+
return bigframes.dataframe.DataFrame(local_block)
109102

110103
@property
111104
def bqclient(self):

noxfile.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@
4646
"3.11",
4747
]
4848

49-
PYTEST_VERSION = "pytest==8.4.2"
49+
# pytest-retry is not yet compatible with pytest 8.x.
50+
# https://github.com/str0zzapreti/pytest-retry/issues/32
51+
PYTEST_VERSION = "pytest<8.0.0dev"
5052
SPHINX_VERSION = "sphinx==4.5.0"
5153
LINT_PATHS = [
5254
"docs",
@@ -113,7 +115,7 @@
113115
# Make sure we leave some versions without "extras" so we know those
114116
# dependencies are actually optional.
115117
"3.10": ["tests", "scikit-learn", "anywidget"],
116-
LATEST_FULLY_SUPPORTED_PYTHON: ["tests", "scikit-learn", "polars", "anywidget"],
118+
"3.11": ["tests", "scikit-learn", "polars", "anywidget"],
117119
"3.13": ["tests", "polars", "anywidget"],
118120
}
119121

scripts/publish_api_coverage.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,9 +204,6 @@ def generate_pandas_api_coverage():
204204
def generate_sklearn_api_coverage():
205205
"""Explore all SKLearn modules, and for each item contained generate a
206206
regex to detect it being imported, and record whether we implement it"""
207-
208-
import sklearn # noqa
209-
210207
sklearn_modules = [
211208
"sklearn",
212209
"sklearn.model_selection",
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Check that bpd and Session can be used interchangablely."""
16+
17+
from __future__ import annotations
18+
19+
from typing import cast
20+
21+
import numpy as np
22+
import pandas.testing
23+
24+
import bigframes.pandas as bpd
25+
import bigframes.session
26+
27+
28+
def test_cut(session: bigframes.session.Session):
29+
sc = [30, 80, 40, 90, 60, 45, 95, 75, 55, 100, 65, 85]
30+
x = [20, 40, 60, 80, 100]
31+
32+
bpd_result = bpd.cut(sc, x)
33+
session_result = session.cut(sc, x)
34+
35+
global_session = bpd.get_global_session()
36+
assert global_session is not session
37+
assert bpd_result._session is global_session
38+
assert session_result._session is session
39+
40+
bpd_pd = bpd_result.to_pandas()
41+
session_pd = session_result.to_pandas()
42+
pandas.testing.assert_series_equal(bpd_pd, session_pd)
43+
44+
45+
def test_dataframe(session: bigframes.session.Session):
46+
data = {"col": ["local", None, "data"]}
47+
48+
bpd_result = bpd.DataFrame(data)
49+
session_result = session.DataFrame(data)
50+
51+
global_session = bpd.get_global_session()
52+
assert global_session is not session
53+
assert bpd_result._session is global_session
54+
assert session_result._session is session
55+
56+
bpd_pd = bpd_result.to_pandas()
57+
session_pd = session_result.to_pandas()
58+
pandas.testing.assert_frame_equal(bpd_pd, session_pd)
59+
60+
61+
def test_multiindex_from_arrays(session: bigframes.session.Session):
62+
arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
63+
64+
bpd_result = bpd.MultiIndex.from_arrays(arrays, names=("number", "color"))
65+
session_result = session.MultiIndex.from_arrays(arrays, names=("number", "color"))
66+
67+
global_session = bpd.get_global_session()
68+
assert global_session is not session
69+
assert bpd_result._session is global_session
70+
assert session_result._session is session
71+
72+
bpd_pd = bpd_result.to_pandas()
73+
session_pd = session_result.to_pandas()
74+
pandas.testing.assert_index_equal(bpd_pd, session_pd)
75+
76+
77+
def test_multiindex_from_tuples(session: bigframes.session.Session):
78+
tuples = [(1, "red"), (1, "blue"), (2, "red"), (2, "blue")]
79+
80+
bpd_result = bpd.MultiIndex.from_tuples(tuples, names=("number", "color"))
81+
session_result = session.MultiIndex.from_tuples(tuples, names=("number", "color"))
82+
83+
global_session = bpd.get_global_session()
84+
assert global_session is not session
85+
assert bpd_result._session is global_session
86+
assert session_result._session is session
87+
88+
bpd_pd = bpd_result.to_pandas()
89+
session_pd = session_result.to_pandas()
90+
pandas.testing.assert_index_equal(bpd_pd, session_pd)
91+
92+
93+
def test_index(session: bigframes.session.Session):
94+
index = [1, 2, 3]
95+
96+
bpd_result = bpd.Index(index)
97+
session_result = session.Index(index)
98+
99+
global_session = bpd.get_global_session()
100+
assert global_session is not session
101+
assert bpd_result._session is global_session
102+
assert session_result._session is session
103+
104+
bpd_pd = bpd_result.to_pandas()
105+
session_pd = session_result.to_pandas()
106+
pandas.testing.assert_index_equal(bpd_pd, session_pd)
107+
108+
109+
def test_series(session: bigframes.session.Session):
110+
series = [1, 2, 3]
111+
112+
bpd_result = bpd.Series(series)
113+
session_result = session.Series(series)
114+
115+
global_session = bpd.get_global_session()
116+
assert global_session is not session
117+
assert bpd_result._session is global_session
118+
assert session_result._session is session
119+
120+
bpd_pd = bpd_result.to_pandas()
121+
session_pd = session_result.to_pandas()
122+
pandas.testing.assert_series_equal(bpd_pd, session_pd)
123+
124+
125+
def test_to_datetime(session: bigframes.session.Session):
126+
datetimes = ["2018-10-26 12:00:00", "2018-10-26 13:00:15"]
127+
128+
bpd_result = bpd.to_datetime(datetimes)
129+
session_result = cast(bpd.Series, session.to_datetime(datetimes))
130+
131+
global_session = bpd.get_global_session()
132+
assert global_session is not session
133+
assert bpd_result._session is global_session
134+
assert session_result._session is session
135+
136+
bpd_pd = bpd_result.to_pandas()
137+
session_pd = session_result.to_pandas()
138+
pandas.testing.assert_series_equal(bpd_pd, session_pd)
139+
140+
141+
def test_to_timedelta(session: bigframes.session.Session):
142+
offsets = np.arange(5)
143+
unit = "s"
144+
145+
bpd_result = bpd.to_timedelta(offsets, unit=unit)
146+
session_result = session.to_timedelta(offsets, unit=unit)
147+
148+
global_session = bpd.get_global_session()
149+
assert global_session is not session
150+
assert bpd_result._session is global_session
151+
assert session_result._session is session
152+
153+
bpd_pd = bpd_result.to_pandas()
154+
session_pd = session_result.to_pandas()
155+
pandas.testing.assert_series_equal(bpd_pd, session_pd)

tests/unit/test_dataframe_polars.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2319,8 +2319,7 @@ def test_binop_with_self_aggregate(session, scalars_dfs):
23192319
df_columns = ["int64_col", "float64_col", "int64_too"]
23202320

23212321
bf_df = scalars_df[df_columns]
2322-
bf_deviation = bf_df - bf_df.mean()
2323-
bf_result = bf_deviation.to_pandas()
2322+
bf_result = (bf_df - bf_df.mean()).to_pandas()
23242323

23252324
pd_df = scalars_pandas_df[df_columns]
23262325
pd_result = pd_df - pd_df.mean()

0 commit comments

Comments
 (0)