Skip to content

Commit 44bc5ee

Browse files
feat: Add DataFrame/Series.squeeze
1 parent 956a5b0 commit 44bc5ee

File tree

5 files changed

+193
-0
lines changed

5 files changed

+193
-0
lines changed

bigframes/dataframe.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2033,6 +2033,22 @@ def nsmallest(
20332033
column_ids = self._sql_names(columns)
20342034
return DataFrame(block_ops.nsmallest(self._block, n, column_ids, keep=keep))
20352035

2036+
def squeeze(self, axis: typing.Optional[typing.Union[int, str]] = None):
2037+
nrows, ncols = self.shape
2038+
squeeze_cols = True
2039+
squeeze_rows = True
2040+
if axis is not None:
2041+
axis_n = utils.get_axis_number(axis)
2042+
squeeze_cols = axis_n == 1
2043+
squeeze_rows = axis_n == 0
2044+
if (ncols == 1) and (nrows == 1) and (squeeze_rows and squeeze_cols):
2045+
return self.to_pandas().iloc[0, 0]
2046+
elif ncols == 1 and squeeze_cols:
2047+
return bigframes.series.Series(self._block)
2048+
elif nrows == 1 and squeeze_rows:
2049+
return bigframes.series.Series(self._block.transpose(single_row_mode=True))
2050+
return self
2051+
20362052
def insert(
20372053
self,
20382054
loc: int,

bigframes/series.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,6 +1133,11 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
11331133
block_ops.nsmallest(self._block, n, [self._value_column], keep=keep)
11341134
)
11351135

1136+
def squeeze(self, axis=None):
1137+
if len(self) == 1:
1138+
return self.to_pandas().iloc[0]
1139+
return self
1140+
11361141
def isin(self, values) -> "Series":
11371142
if isinstance(values, Series):
11381143
return Series(self._block.isin(values._block))

tests/unit/test_dataframe_polars.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1785,6 +1785,57 @@ def test_dataframe_sort_index_inplace(scalars_dfs):
17851785
pandas.testing.assert_frame_equal(bf_result, pd_result)
17861786

17871787

1788+
@pytest.mark.parametrize(
1789+
("axis",),
1790+
((0,), ("columns",), (None,)),
1791+
)
1792+
def test_dataframe_squeeze_noop(scalars_dfs, axis):
1793+
scalars_df, scalars_pandas_df = scalars_dfs
1794+
1795+
pd_result = scalars_pandas_df.squeeze(axis=axis)
1796+
bf_result = scalars_df.squeeze(axis=axis).to_pandas()
1797+
1798+
pandas.testing.assert_frame_equal(bf_result, pd_result)
1799+
1800+
1801+
@pytest.mark.parametrize(
1802+
("axis",),
1803+
((1,), (None,)),
1804+
)
1805+
def test_dataframe_squeeze_cols(scalars_dfs, axis):
1806+
scalars_df, scalars_pandas_df = scalars_dfs
1807+
1808+
pd_result = scalars_pandas_df[["int64_col"]].squeeze(axis)
1809+
bf_result = scalars_df[["int64_col"]].squeeze(axis).to_pandas()
1810+
1811+
pandas.testing.assert_series_equal(bf_result, pd_result)
1812+
1813+
1814+
@pytest.mark.parametrize(
1815+
("axis",),
1816+
((0,), (None,)),
1817+
)
1818+
def test_dataframe_squeeze_rows(scalars_dfs, axis):
1819+
scalars_df, scalars_pandas_df = scalars_dfs
1820+
1821+
# implicitly transposes, so col types need to be compatible
1822+
pd_result = scalars_pandas_df[["int64_col", "int64_too"]].head(1).squeeze(axis)
1823+
bf_result = scalars_df[["int64_col", "int64_too"]].head(1).squeeze(axis).to_pandas()
1824+
1825+
pandas.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
1826+
1827+
1828+
def test_dataframe_squeeze_both_axes(
1829+
scalars_dfs,
1830+
):
1831+
scalars_df, scalars_pandas_df = scalars_dfs
1832+
1833+
pd_result = scalars_pandas_df[["int64_col"]].head(1).squeeze()
1834+
bf_result = scalars_df[["int64_col"]].head(1).squeeze()
1835+
1836+
assert pd_result == bf_result
1837+
1838+
17881839
def test_df_abs(scalars_dfs):
17891840
scalars_df, scalars_pandas_df = scalars_dfs
17901841
columns = ["int64_col", "int64_too", "float64_col"]

tests/unit/test_series_polars.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,25 @@ def test_series_equals_different_values(scalars_df_index, scalars_pandas_df_inde
544544
assert pd_result == bf_result
545545

546546

547+
def test_series_squeeze_noop(scalars_dfs):
548+
scalars_df, scalars_pandas_df = scalars_dfs
549+
550+
pd_result = scalars_pandas_df["int64_too"].squeeze()
551+
bf_result = scalars_df["int64_too"].squeeze().to_pandas()
552+
553+
assert_series_equal(bf_result, pd_result)
554+
555+
556+
def test_series_squeeze_squeezes(scalars_dfs):
557+
scalars_df, scalars_pandas_df = scalars_dfs
558+
559+
# implicitly transposes, so col types need to be compatible
560+
pd_result = scalars_pandas_df["int64_too"].head(1).squeeze()
561+
bf_result = scalars_df["int64_too"].head(1).squeeze()
562+
563+
assert pd_result == bf_result
564+
565+
547566
def test_series_get_with_default_index(scalars_dfs):
548567
col_name = "float64_col"
549568
key = 2

third_party/bigframes_vendored/pandas/core/generic.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,108 @@ def __iter__(self) -> Iterator:
8282
"""
8383
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
8484

85+
def squeeze(self, axis=None):
86+
"""
87+
Squeeze 1 dimensional axis objects into scalars.
88+
89+
Series or DataFrames with a single element are squeezed to a scalar.
90+
DataFrames with a single column or a single row are squeezed to a
91+
Series. Otherwise the object is unchanged.
92+
93+
This method is most useful when you don't know if your
94+
object is a Series or DataFrame, but you do know it has just a single
95+
column. In that case you can safely call `squeeze` to ensure you have a
96+
Series.
97+
98+
**Examples:**
99+
>>> primes = bpd.Series([2, 3, 5, 7])
100+
101+
Slicing might produce a Series with a single value:
102+
103+
>>> even_primes = primes[primes % 2 == 0]
104+
>>> even_primes
105+
0 2
106+
dtype: Int64
107+
108+
>>> even_primes.squeeze()
109+
np.int64(2)
110+
111+
Squeezing objects with more than one value in every axis does nothing:
112+
113+
>>> odd_primes = primes[primes % 2 == 1]
114+
>>> odd_primes
115+
1 3
116+
2 5
117+
3 7
118+
dtype: Int64
119+
120+
>>> odd_primes.squeeze()
121+
1 3
122+
2 5
123+
3 7
124+
dtype: Int64
125+
126+
Squeezing is even more effective when used with DataFrames.
127+
128+
>>> df = bpd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
129+
>>> df
130+
a b
131+
0 1 2
132+
1 3 4
133+
<BLANKLINE>
134+
[2 rows x 2 columns]
135+
136+
Slicing a single column will produce a DataFrame with the columns
137+
having only one value:
138+
139+
>>> df_a = df[['a']]
140+
>>> df_a
141+
a
142+
0 1
143+
1 3
144+
<BLANKLINE>
145+
[2 rows x 1 columns]
146+
147+
So the columns can be squeezed down, resulting in a Series:
148+
149+
>>> df_a.squeeze('columns')
150+
0 1
151+
1 3
152+
Name: a, dtype: Int64
153+
154+
Slicing a single row from a single column will produce a single
155+
scalar DataFrame:
156+
157+
>>> df_0a = df.loc[[0], ['a']]
158+
>>> df_0a
159+
a
160+
0 1
161+
<BLANKLINE>
162+
[1 rows x 1 columns]
163+
164+
Squeezing the rows produces a single scalar Series:
165+
166+
>>> df_0a.squeeze('rows')
167+
a 1
168+
Name: 0, dtype: Int64
169+
170+
Squeezing all axes will project directly into a scalar:
171+
172+
>>> df_0a.squeeze()
173+
np.int64(1)
174+
175+
Args:
176+
axis ({0 or 'index', 1 or 'columns', None}, default None):
177+
A specific axis to squeeze. By default, all length-1 axes are
178+
squeezed. For `Series` this parameter is unused and defaults to `None`.
179+
180+
Returns:
181+
DataFrame, Series, or scalar:
182+
The projection after squeezing `axis` or all the axes.
183+
184+
"""
185+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
186+
85187
# -------------------------------------------------------------------------
86188
# Unary Methods
87189

0 commit comments

Comments
 (0)