@@ -135,11 +135,11 @@ def test_groupby_reduce(
135135 by = da .from_array (by , chunks = (3 ,) if by .ndim == 1 else (1 , 3 ))
136136
137137 if func == "mean" or func == "nanmean" :
138- expected_result = np .array (expected , dtype = float )
138+ expected_result = np .array (expected , dtype = np . float64 )
139139 elif func == "sum" :
140140 expected_result = np .array (expected , dtype = dtype )
141141 elif func == "count" :
142- expected_result = np .array (expected , dtype = int )
142+ expected_result = np .array (expected , dtype = np . int64 )
143143
144144 result , groups , = groupby_reduce (
145145 array ,
@@ -149,7 +149,9 @@ def test_groupby_reduce(
149149 fill_value = 123 ,
150150 engine = engine ,
151151 )
152- g_dtype = by .dtype if expected_groups is None else np .asarray (expected_groups ).dtype
152+ # we use pd.Index(expected_groups).to_numpy() which is always int64
153+ # for the values in this tests
154+ g_dtype = by .dtype if expected_groups is None else np .int64
153155
154156 assert_equal (groups , np .array ([0 , 1 , 2 ], g_dtype ))
155157 assert_equal (expected_result , result )
@@ -274,7 +276,7 @@ def test_groupby_reduce_count():
274276 array = np .array ([0 , 0 , np .nan , np .nan , np .nan , 1 , 1 ])
275277 labels = np .array (["a" , "b" , "b" , "b" , "c" , "c" , "c" ])
276278 result , _ = groupby_reduce (array , labels , func = "count" )
277- assert_equal (result , [1 , 1 , 2 ])
279+ assert_equal (result , np . array ( [1 , 1 , 2 ], dtype = np . int64 ) )
278280
279281
280282def test_func_is_aggregation ():
@@ -383,53 +385,52 @@ def test_groupby_agg_dask(func, shape, array_chunks, group_chunks, add_nan, dtyp
383385 kwargs ["expected_groups" ] = [0 , 2 , 1 ]
384386 with raise_if_dask_computes ():
385387 actual , groups = groupby_reduce (array , by , engine = engine , ** kwargs , sort = False )
386- assert_equal (groups , [0 , 2 , 1 ])
388+ assert_equal (groups , np . array ( [0 , 2 , 1 ], dtype = np . intp ) )
387389 assert_equal (expected , actual [..., [0 , 2 , 1 ]])
388390
389- kwargs ["expected_groups" ] = [0 , 2 , 1 ]
390391 with raise_if_dask_computes ():
391392 actual , groups = groupby_reduce (array , by , engine = engine , ** kwargs , sort = True )
392- assert_equal (groups , [0 , 1 , 2 ])
393+ assert_equal (groups , np . array ( [0 , 1 , 2 ], np . intp ) )
393394 assert_equal (expected , actual )
394395
395396
396397def test_numpy_reduce_axis_subset (engine ):
397398 # TODO: add NaNs
398399 by = labels2d
399- array = np .ones_like (by )
400+ array = np .ones_like (by , dtype = np . int64 )
400401 kwargs = dict (func = "count" , engine = engine , fill_value = 0 )
401402 result , _ = groupby_reduce (array , by , ** kwargs , axis = 1 )
402- assert_equal (result , [[2 , 3 ], [2 , 3 ]])
403+ assert_equal (result , np . array ( [[2 , 3 ], [2 , 3 ]], dtype = np . int64 ) )
403404
404405 by = np .broadcast_to (labels2d , (3 , * labels2d .shape ))
405406 array = np .ones_like (by )
406407 result , _ = groupby_reduce (array , by , ** kwargs , axis = 1 )
407- subarr = np .array ([[1 , 1 ], [1 , 1 ], [0 , 2 ], [1 , 1 ], [1 , 1 ]])
408+ subarr = np .array ([[1 , 1 ], [1 , 1 ], [0 , 2 ], [1 , 1 ], [1 , 1 ]], dtype = np . int64 )
408409 expected = np .tile (subarr , (3 , 1 , 1 ))
409410 assert_equal (result , expected )
410411
411412 result , _ = groupby_reduce (array , by , ** kwargs , axis = 2 )
412- subarr = np .array ([[2 , 3 ], [2 , 3 ]])
413+ subarr = np .array ([[2 , 3 ], [2 , 3 ]], dtype = np . int64 )
413414 expected = np .tile (subarr , (3 , 1 , 1 ))
414415 assert_equal (result , expected )
415416
416417 result , _ = groupby_reduce (array , by , ** kwargs , axis = (1 , 2 ))
417- expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]])
418+ expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]], dtype = np . int64 )
418419 assert_equal (result , expected )
419420
420421 result , _ = groupby_reduce (array , by , ** kwargs , axis = (2 , 1 ))
421422 assert_equal (result , expected )
422423
423424 result , _ = groupby_reduce (array , by [0 , ...], ** kwargs , axis = (1 , 2 ))
424- expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]])
425+ expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]], dtype = np . int64 )
425426 assert_equal (result , expected )
426427
427428
428429@requires_dask
429430def test_dask_reduce_axis_subset ():
430431
431432 by = labels2d
432- array = np .ones_like (by )
433+ array = np .ones_like (by , dtype = np . int64 )
433434 with raise_if_dask_computes ():
434435 result , _ = groupby_reduce (
435436 da .from_array (array , chunks = (2 , 3 )),
@@ -438,11 +439,11 @@ def test_dask_reduce_axis_subset():
438439 axis = 1 ,
439440 expected_groups = [0 , 2 ],
440441 )
441- assert_equal (result , [[2 , 3 ], [2 , 3 ]])
442+ assert_equal (result , np . array ( [[2 , 3 ], [2 , 3 ]], dtype = np . int64 ) )
442443
443444 by = np .broadcast_to (labels2d , (3 , * labels2d .shape ))
444445 array = np .ones_like (by )
445- subarr = np .array ([[1 , 1 ], [1 , 1 ], [123 , 2 ], [1 , 1 ], [1 , 1 ]])
446+ subarr = np .array ([[1 , 1 ], [1 , 1 ], [123 , 2 ], [1 , 1 ], [1 , 1 ]], dtype = np . int64 )
446447 expected = np .tile (subarr , (3 , 1 , 1 ))
447448 with raise_if_dask_computes ():
448449 result , _ = groupby_reduce (
@@ -455,7 +456,7 @@ def test_dask_reduce_axis_subset():
455456 )
456457 assert_equal (result , expected )
457458
458- subarr = np .array ([[2 , 3 ], [2 , 3 ]])
459+ subarr = np .array ([[2 , 3 ], [2 , 3 ]], dtype = np . int64 )
459460 expected = np .tile (subarr , (3 , 1 , 1 ))
460461 with raise_if_dask_computes ():
461462 result , _ = groupby_reduce (
@@ -663,7 +664,7 @@ def test_groupby_bins(chunk_labels, chunks, engine, method) -> None:
663664 engine = engine ,
664665 method = method ,
665666 )
666- expected = np .array ([3 , 1 , 0 ])
667+ expected = np .array ([3 , 1 , 0 ], dtype = np . int64 )
667668 for left , right in zip (groups , pd .IntervalIndex .from_arrays ([1 , 2 , 4 ], [2 , 4 , 5 ]).to_numpy ()):
668669 assert left == right
669670 assert_equal (actual , expected )
@@ -780,15 +781,23 @@ def test_dtype_preservation(dtype, func, engine):
780781
781782
782783@requires_dask
783- @pytest .mark .parametrize ("method" , ["split-reduce" , "map-reduce" , "cohorts" ])
784- def test_cohorts (method ):
785- repeats = [4 , 4 , 12 , 2 , 3 , 4 ]
786- labels = np .repeat (np .arange (6 ), repeats )
787- array = dask .array .from_array (labels , chunks = (4 , 8 , 4 , 9 , 4 ))
784+ @pytest .mark .parametrize ("dtype" , [np .int32 , np .int64 ])
785+ @pytest .mark .parametrize (
786+ "labels_dtype" , [pytest .param (np .int32 , marks = pytest .mark .xfail ), np .int64 ]
787+ )
788+ @pytest .mark .parametrize ("method" , ["map-reduce" , "cohorts" ])
789+ def test_cohorts_map_reduce_consistent_dtypes (method , dtype , labels_dtype ):
790+ repeats = np .array ([4 , 4 , 12 , 2 , 3 , 4 ], dtype = np .int32 )
791+ labels = np .repeat (np .arange (6 , dtype = labels_dtype ), repeats )
792+ array = dask .array .from_array (labels .astype (dtype ), chunks = (4 , 8 , 4 , 9 , 4 ))
788793
789794 actual , actual_groups = groupby_reduce (array , labels , func = "count" , method = method )
790- assert_equal (actual_groups , np .arange (6 ))
791- assert_equal (actual , repeats )
795+ assert_equal (actual_groups , np .arange (6 , dtype = labels .dtype ))
796+ assert_equal (actual , repeats .astype (np .int64 ))
797+
798+ actual , actual_groups = groupby_reduce (array , labels , func = "sum" , method = method )
799+ assert_equal (actual_groups , np .arange (6 , dtype = labels .dtype ))
800+ assert_equal (actual , np .array ([0 , 4 , 24 , 6 , 12 , 20 ], dtype ))
792801
793802
794803@requires_dask
@@ -800,7 +809,7 @@ def test_cohorts_nd_by(func, method, axis, engine):
800809 o2 = dask .array .ones ((2 , 3 ), chunks = - 1 )
801810
802811 array = dask .array .block ([[o , 2 * o ], [3 * o2 , 4 * o2 ]])
803- by = array .compute ().astype (int )
812+ by = array .compute ().astype (np . int64 )
804813 by [0 , 1 ] = 30
805814 by [2 , 1 ] = 40
806815 by [0 , 4 ] = 31
@@ -825,9 +834,9 @@ def test_cohorts_nd_by(func, method, axis, engine):
825834
826835 actual , groups = groupby_reduce (array , by , sort = False , ** kwargs )
827836 if method == "map-reduce" :
828- assert_equal (groups , [1 , 30 , 2 , 31 , 3 , 4 , 40 ])
837+ assert_equal (groups , np . array ( [1 , 30 , 2 , 31 , 3 , 4 , 40 ], dtype = np . int64 ) )
829838 else :
830- assert_equal (groups , [1 , 30 , 2 , 31 , 3 , 40 , 4 ])
839+ assert_equal (groups , np . array ( [1 , 30 , 2 , 31 , 3 , 40 , 4 ], dtype = np . int64 ) )
831840 reindexed = reindex_ (actual , groups , pd .Index (sorted_groups ))
832841 assert_equal (reindexed , expected )
833842
@@ -950,7 +959,7 @@ def test_factorize_values_outside_bins():
950959 fastpath = True ,
951960 )
952961 actual = vals [0 ]
953- expected = np .array ([[- 1 , - 1 ], [- 1 , 0 ], [6 , 12 ], [18 , 24 ], [- 1 , - 1 ]])
962+ expected = np .array ([[- 1 , - 1 ], [- 1 , 0 ], [6 , 12 ], [18 , 24 ], [- 1 , - 1 ]], np . int64 )
954963 assert_equal (expected , actual )
955964
956965
@@ -967,7 +976,7 @@ def test_multiple_groupers() -> None:
967976 reindex = True ,
968977 func = "count" ,
969978 )
970- expected = np .eye (5 , 5 , dtype = int )
979+ expected = np .eye (5 , 5 , dtype = np . int64 )
971980 assert_equal (expected , actual )
972981
973982
@@ -979,38 +988,38 @@ def test_factorize_reindex_sorting_strings():
979988 )
980989
981990 expected = factorize_ (** kwargs , reindex = True , sort = True )[0 ]
982- assert_equal (expected , [0 , 1 , 4 , 2 ])
991+ assert_equal (expected , np . array ( [0 , 1 , 4 , 2 ], dtype = np . int64 ) )
983992
984993 expected = factorize_ (** kwargs , reindex = True , sort = False )[0 ]
985- assert_equal (expected , [0 , 3 , 4 , 1 ])
994+ assert_equal (expected , np . array ( [0 , 3 , 4 , 1 ], dtype = np . int64 ) )
986995
987996 expected = factorize_ (** kwargs , reindex = False , sort = False )[0 ]
988- assert_equal (expected , [0 , 1 , 2 , 3 ])
997+ assert_equal (expected , np . array ( [0 , 1 , 2 , 3 ], dtype = np . int64 ) )
989998
990999 expected = factorize_ (** kwargs , reindex = False , sort = True )[0 ]
991- assert_equal (expected , [0 , 1 , 3 , 2 ])
1000+ assert_equal (expected , np . array ( [0 , 1 , 3 , 2 ], dtype = np . int64 ) )
9921001
9931002
9941003def test_factorize_reindex_sorting_ints ():
9951004 kwargs = dict (
9961005 by = (np .array ([- 10 , 1 , 10 , 2 , 3 , 5 ]),),
9971006 axis = - 1 ,
998- expected_groups = (np .array ([0 , 1 , 2 , 3 , 4 , 5 ]),),
1007+ expected_groups = (np .array ([0 , 1 , 2 , 3 , 4 , 5 ], np . int64 ),),
9991008 )
10001009
10011010 expected = factorize_ (** kwargs , reindex = True , sort = True )[0 ]
1002- assert_equal (expected , [6 , 1 , 6 , 2 , 3 , 5 ])
1011+ assert_equal (expected , np . array ( [6 , 1 , 6 , 2 , 3 , 5 ], dtype = np . int64 ) )
10031012
10041013 expected = factorize_ (** kwargs , reindex = True , sort = False )[0 ]
1005- assert_equal (expected , [6 , 1 , 6 , 2 , 3 , 5 ])
1014+ assert_equal (expected , np . array ( [6 , 1 , 6 , 2 , 3 , 5 ], dtype = np . int64 ) )
10061015
10071016 kwargs ["expected_groups" ] = (np .arange (5 , - 1 , - 1 ),)
10081017
10091018 expected = factorize_ (** kwargs , reindex = True , sort = True )[0 ]
1010- assert_equal (expected , [6 , 1 , 6 , 2 , 3 , 5 ])
1019+ assert_equal (expected , np . array ( [6 , 1 , 6 , 2 , 3 , 5 ], dtype = np . int64 ) )
10111020
10121021 expected = factorize_ (** kwargs , reindex = True , sort = False )[0 ]
1013- assert_equal (expected , [6 , 4 , 6 , 3 , 2 , 0 ])
1022+ assert_equal (expected , np . array ( [6 , 4 , 6 , 3 , 2 , 0 ], dtype = np . int64 ) )
10141023
10151024
10161025@requires_dask
0 commit comments