File tree Expand file tree Collapse file tree 1 file changed +9
-11
lines changed
Expand file tree Collapse file tree 1 file changed +9
-11
lines changed Original file line number Diff line number Diff line change @@ -2164,11 +2164,10 @@ def _factorize_multiple(
21642164 for by_ , expect in zip (by , expected_groups ):
21652165 if expect is None :
21662166 if is_duck_dask_array (by_ ):
2167- raise ValueError (
2168- "Please provide expected_groups when grouping by a dask array."
2169- )
2170-
2171- found_group = pd .unique (by_ .reshape (- 1 ))
2167+ # could be remote dataset, execute remotely in that case
2168+ found_group = np .unique (by_ .reshape (- 1 )).compute ()
2169+ else :
2170+ found_group = pd .unique (by_ .reshape (- 1 ))
21722171 else :
21732172 found_group = expect .to_numpy ()
21742173
@@ -2475,15 +2474,14 @@ def groupby_reduce(
24752474
24762475 # Don't factorize early only when
24772476 # grouping by dask arrays, and not having expected_groups
2477+ # except for cohorts
24782478 factorize_early = not (
24792479 # can't do it if we are grouping by dask array but don't have expected_groups
2480- any (is_dask and ex_ is None for is_dask , ex_ in zip (by_is_dask , expected_groups ))
2481- )
2482-
2483- if method == "cohorts" and not factorize_early :
2484- raise ValueError (
2485- "method='cohorts' can only be used when grouping by dask arrays if `expected_groups` is provided."
2480+ any (
2481+ is_dask and ex_ is None and method != "cohorts"
2482+ for is_dask , ex_ in zip (by_is_dask , expected_groups )
24862483 )
2484+ )
24872485
24882486 expected_ : pd .RangeIndex | None
24892487 if factorize_early :
You can’t perform that action at this time.
0 commit comments