Skip to content

Commit 1975c8a

Browse files
committed
iterate over keys
1 parent 96fb73b commit 1975c8a

File tree

2 files changed

+139
-25
lines changed

2 files changed

+139
-25
lines changed

bigframes/core/groupby/__init__.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,31 @@ def head(self, n: int = 5) -> df.DataFrame:
144144
)
145145

146146
def __iter__(self) -> Iterable[Tuple[blocks.Label, pd.DataFrame]]:
147-
# TODO: make a struct of all columns and then array_agg that.
148-
return ()
147+
# TODO: cache original block, clustered by column ids
148+
block = self._block.set_index(
149+
self._by_col_ids,
150+
# TODO: do we need to keep the original index?
151+
drop=False,
152+
index_labels=self._block._get_labels_for_columns(
153+
self._by_col_ids
154+
).to_list(),
155+
)
156+
block.cached(force=True)
157+
158+
keys_block, _ = block.aggregate(
159+
by_column_ids=self._by_col_ids,
160+
dropna=self._dropna,
161+
)
162+
for batch in keys_block.to_pandas_batches():
163+
for key in batch.index:
164+
# group_block = block
165+
# for col in self._by_col_ids: # TODO: can't loop through key if only one by_col_id.
166+
167+
#
168+
# = block.project_expr(bigframes.core.expression.const(key, dtype=self._block._column_type(self._by_col_ids))
169+
# ops.eq_op( ex.const(key)
170+
# )
171+
yield key, batch # TODO: filter clustered block by row
149172

150173
def size(self) -> typing.Union[df.DataFrame, series.Series]:
151174
agg_block, _ = self._block.aggregate_size(

0 commit comments

Comments
 (0)