Skip to content

Commit 0a1ec39

Browse files
committed
feat: add GroupBy.__iter__
1 parent 4c3548f commit 0a1ec39

File tree

2 files changed

+68
-3
lines changed

2 files changed

+68
-3
lines changed

bigframes/core/groupby/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from __future__ import annotations
1616

1717
import typing
18-
from typing import Sequence, Union
18+
from typing import Iterable, Sequence, Tuple, Union
1919

2020
import bigframes_vendored.constants as constants
2121
import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
@@ -142,6 +142,10 @@ def head(self, n: int = 5) -> df.DataFrame:
142142
)
143143
)
144144

145+
def __iter__(self) -> Iterable[Tuple[blocks.Label, pd.DataFrame]]:
146+
# TODO: make a struct of all columns and then array_agg that.
147+
return ()
148+
145149
def size(self) -> typing.Union[df.DataFrame, series.Series]:
146150
agg_block, _ = self._block.aggregate_size(
147151
by_column_ids=self._by_col_ids,

third_party/bigframes_vendored/pandas/core/groupby/__init__.py

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,11 +1032,11 @@ def size(self):
10321032
10331033
**Examples:**
10341034
1035-
For SeriesGroupBy:
1036-
10371035
>>> import bigframes.pandas as bpd
10381036
>>> bpd.options.display.progress_bar = None
10391037
1038+
For SeriesGroupBy:
1039+
10401040
>>> lst = ['a', 'a', 'b']
10411041
>>> ser = bpd.Series([1, 2, 3], index=lst)
10421042
>>> ser
@@ -1074,6 +1074,67 @@ def size(self):
10741074
"""
10751075
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
10761076

1077+
def __iter__(self):
1078+
"""
1079+
Groupby iterator.
1080+
1081+
This method provides an iterator over the groups created by the ``resample``
1082+
or ``groupby`` operation on the object. The method yields tuples where
1083+
the first element is the label (group key) corresponding to each group or
1084+
resampled bin, and the second element is the subset of the data that falls
1085+
within that group or bin.
1086+
1087+
**Examples:**
1088+
1089+
>>> import bigframes.pandas as bpd
1090+
>>> bpd.options.display.progress_bar = None
1091+
1092+
For SeriesGroupBy:
1093+
1094+
>>> lst = ["a", "a", "b"]
1095+
>>> ser = bpd.Series([1, 2, 3], index=lst)
1096+
>>> ser
1097+
a 1
1098+
a 2
1099+
b 3
1100+
dtype: Int64
1101+
>>> for x, y in ser.groupby(level=0):
1102+
... print(f"{x}\\n{y}\\n")
1103+
a
1104+
a 1
1105+
a 2
1106+
dtype: Int64
1107+
b
1108+
b 3
1109+
dtype: Int64
1110+
1111+
For DataFrameGroupBy:
1112+
1113+
>>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]]
1114+
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"])
1115+
>>> df
1116+
a b c
1117+
0 1 2 3
1118+
1 1 5 6
1119+
2 7 8 9
1120+
>>> for x, y in df.groupby(by=["a"]):
1121+
... print(f"{x}\\n{y}\\n")
1122+
(1,)
1123+
a b c
1124+
0 1 2 3
1125+
1 1 5 6
1126+
(7,)
1127+
a b c
1128+
2 7 8 9
1129+
1130+
1131+
Returns:
1132+
Iterator of tuples:
1133+
Generator yielding a sequence of (``name``, downloaded
1134+
``pandas.DataFrame`` or ``pandas.Series``) for each group.
1135+
"""
1136+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
1137+
10771138

10781139
class SeriesGroupBy(GroupBy):
10791140
def agg(self, func):

0 commit comments

Comments
 (0)