@@ -82,7 +82,7 @@ def get_stats(group):
8282 assert result .index .names [0 ] == "C"
8383
8484
85- def test_basic (using_infer_string ): # TODO: split this test
85+ def test_basic ():
8686 cats = Categorical (
8787 ["a" , "a" , "a" , "b" , "b" , "b" , "c" , "c" , "c" ],
8888 categories = ["a" , "b" , "c" , "d" ],
@@ -95,17 +95,20 @@ def test_basic(using_infer_string): # TODO: split this test
9595 result = data .groupby ("b" , observed = False ).mean ()
9696 tm .assert_frame_equal (result , expected )
9797
98+
99+ def test_basic_single_grouper ():
98100 cat1 = Categorical (["a" , "a" , "b" , "b" ], categories = ["a" , "b" , "z" ], ordered = True )
99101 cat2 = Categorical (["c" , "d" , "c" , "d" ], categories = ["c" , "d" , "y" ], ordered = True )
100102 df = DataFrame ({"A" : cat1 , "B" : cat2 , "values" : [1 , 2 , 3 , 4 ]})
101103
102- # single grouper
103104 gb = df .groupby ("A" , observed = False )
104105 exp_idx = CategoricalIndex (["a" , "b" , "z" ], name = "A" , ordered = True )
105106 expected = DataFrame ({"values" : Series ([3 , 7 , 0 ], index = exp_idx )})
106107 result = gb .sum (numeric_only = True )
107108 tm .assert_frame_equal (result , expected )
108109
110+
111+ def test_basic_string (using_infer_string ):
109112 # GH 8623
110113 x = DataFrame (
111114 [[1 , "John P. Doe" ], [2 , "Jane Dove" ], [1 , "John P. Doe" ]],
@@ -133,8 +136,9 @@ def f(x):
133136 expected ["person_name" ] = expected ["person_name" ].astype (dtype )
134137 tm .assert_frame_equal (result , expected )
135138
139+
140+ def test_basic_monotonic ():
136141 # GH 9921
137- # Monotonic
138142 df = DataFrame ({"a" : [5 , 15 , 25 ]})
139143 c = pd .cut (df .a , bins = [0 , 10 , 20 , 30 , 40 ])
140144
@@ -165,7 +169,8 @@ def f(x):
165169 tm .assert_series_equal (df .a .groupby (c , observed = False ).filter (np .all ), df ["a" ])
166170 tm .assert_frame_equal (df .groupby (c , observed = False ).filter (np .all ), df )
167171
168- # Non-monotonic
172+
173+ def test_basic_non_monotonic ():
169174 df = DataFrame ({"a" : [5 , 15 , 25 , - 5 ]})
170175 c = pd .cut (df .a , bins = [- 10 , 0 , 10 , 20 , 30 , 40 ])
171176
@@ -183,6 +188,8 @@ def f(x):
183188 df .groupby (c , observed = False ).transform (lambda xs : np .sum (xs )), df [["a" ]]
184189 )
185190
191+
192+ def test_basic_cut_grouping ():
186193 # GH 9603
187194 df = DataFrame ({"a" : [1 , 0 , 0 , 0 ]})
188195 c = pd .cut (df .a , [0 , 1 , 2 , 3 , 4 ], labels = Categorical (list ("abcd" )))
@@ -193,13 +200,14 @@ def f(x):
193200 expected .index .name = "a"
194201 tm .assert_series_equal (result , expected )
195202
196- # more basic
203+
204+ def test_more_basic ():
197205 levels = ["foo" , "bar" , "baz" , "qux" ]
198- codes = np .random .default_rng (2 ).integers (0 , 4 , size = 100 )
206+ codes = np .random .default_rng (2 ).integers (0 , 4 , size = 10 )
199207
200208 cats = Categorical .from_codes (codes , levels , ordered = True )
201209
202- data = DataFrame (np .random .default_rng (2 ).standard_normal ((100 , 4 )))
210+ data = DataFrame (np .random .default_rng (2 ).standard_normal ((10 , 4 )))
203211
204212 result = data .groupby (cats , observed = False ).mean ()
205213
@@ -225,9 +233,9 @@ def f(x):
225233 # GH 10460
226234 expc = Categorical .from_codes (np .arange (4 ).repeat (8 ), levels , ordered = True )
227235 exp = CategoricalIndex (expc )
228- tm .assert_index_equal (( desc_result .stack ().index .get_level_values (0 ) ), exp )
236+ tm .assert_index_equal (desc_result .stack ().index .get_level_values (0 ), exp )
229237 exp = Index (["count" , "mean" , "std" , "min" , "25%" , "50%" , "75%" , "max" ] * 4 )
230- tm .assert_index_equal (( desc_result .stack ().index .get_level_values (1 ) ), exp )
238+ tm .assert_index_equal (desc_result .stack ().index .get_level_values (1 ), exp )
231239
232240
233241def test_level_get_group (observed ):
@@ -352,6 +360,8 @@ def test_observed(observed):
352360
353361 tm .assert_frame_equal (result , expected )
354362
363+
364+ def test_observed_single_column (observed ):
355365 # https://github.com/pandas-dev/pandas/issues/8138
356366 d = {
357367 "cat" : Categorical (
@@ -362,7 +372,6 @@ def test_observed(observed):
362372 }
363373 df = DataFrame (d )
364374
365- # Grouping on a single column
366375 groups_single_key = df .groupby ("cat" , observed = observed )
367376 result = groups_single_key .mean ()
368377
@@ -378,7 +387,17 @@ def test_observed(observed):
378387
379388 tm .assert_frame_equal (result , expected )
380389
381- # Grouping on two columns
390+
391+ def test_observed_two_columns (observed ):
392+ # https://github.com/pandas-dev/pandas/issues/8138
393+ d = {
394+ "cat" : Categorical (
395+ ["a" , "b" , "a" , "b" ], categories = ["a" , "b" , "c" ], ordered = True
396+ ),
397+ "ints" : [1 , 1 , 2 , 2 ],
398+ "val" : [10 , 20 , 30 , 40 ],
399+ }
400+ df = DataFrame (d )
382401 groups_double_key = df .groupby (["cat" , "ints" ], observed = observed )
383402 result = groups_double_key .agg ("mean" )
384403 expected = DataFrame (
@@ -404,6 +423,8 @@ def test_observed(observed):
404423 expected = df [(df .cat == c ) & (df .ints == i )]
405424 tm .assert_frame_equal (result , expected )
406425
426+
427+ def test_observed_with_as_index (observed ):
407428 # gh-8869
408429 # with as_index
409430 d = {
@@ -591,7 +612,6 @@ def test_dataframe_categorical_with_nan(observed):
591612
592613
593614@pytest .mark .parametrize ("ordered" , [True , False ])
594- @pytest .mark .parametrize ("observed" , [True , False ])
595615def test_dataframe_categorical_ordered_observed_sort (ordered , observed , sort ):
596616 # GH 25871: Fix groupby sorting on ordered Categoricals
597617 # GH 25167: Groupby with observed=True doesn't sort
@@ -627,11 +647,11 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
627647def test_datetime ():
628648 # GH9049: ensure backward compatibility
629649 levels = pd .date_range ("2014-01-01" , periods = 4 )
630- codes = np .random .default_rng (2 ).integers (0 , 4 , size = 100 )
650+ codes = np .random .default_rng (2 ).integers (0 , 4 , size = 10 )
631651
632652 cats = Categorical .from_codes (codes , levels , ordered = True )
633653
634- data = DataFrame (np .random .default_rng (2 ).standard_normal ((100 , 4 )))
654+ data = DataFrame (np .random .default_rng (2 ).standard_normal ((10 , 4 )))
635655 result = data .groupby (cats , observed = False ).mean ()
636656
637657 expected = data .groupby (np .asarray (cats ), observed = False ).mean ()
@@ -832,7 +852,10 @@ def test_preserve_categories():
832852 df .groupby ("A" , sort = False , observed = False ).first ().index , nosort_index
833853 )
834854
835- # ordered=False
855+
856+ def test_preserve_categories_ordered_false ():
857+ # GH-13179
858+ categories = list ("abc" )
836859 df = DataFrame ({"A" : Categorical (list ("ba" ), categories = categories , ordered = False )})
837860 sort_index = CategoricalIndex (categories , categories , ordered = False , name = "A" )
838861 # GH#48749 - don't change order of categories
@@ -846,7 +869,8 @@ def test_preserve_categories():
846869 )
847870
848871
849- def test_preserve_categorical_dtype ():
872+ @pytest .mark .parametrize ("col" , ["C1" , "C2" ])
873+ def test_preserve_categorical_dtype (col ):
850874 # GH13743, GH13854
851875 df = DataFrame (
852876 {
@@ -865,18 +889,15 @@ def test_preserve_categorical_dtype():
865889 "C2" : Categorical (list ("bac" ), categories = list ("bac" ), ordered = True ),
866890 }
867891 )
868- for col in ["C1" , "C2" ]:
869- result1 = df .groupby (by = col , as_index = False , observed = False ).mean (
870- numeric_only = True
871- )
872- result2 = (
873- df .groupby (by = col , as_index = True , observed = False )
874- .mean (numeric_only = True )
875- .reset_index ()
876- )
877- expected = exp_full .reindex (columns = result1 .columns )
878- tm .assert_frame_equal (result1 , expected )
879- tm .assert_frame_equal (result2 , expected )
892+ result1 = df .groupby (by = col , as_index = False , observed = False ).mean (numeric_only = True )
893+ result2 = (
894+ df .groupby (by = col , as_index = True , observed = False )
895+ .mean (numeric_only = True )
896+ .reset_index ()
897+ )
898+ expected = exp_full .reindex (columns = result1 .columns )
899+ tm .assert_frame_equal (result1 , expected )
900+ tm .assert_frame_equal (result2 , expected )
880901
881902
882903@pytest .mark .parametrize (
@@ -931,6 +952,8 @@ def test_categorical_no_compress():
931952 )
932953 tm .assert_series_equal (result , exp )
933954
955+
956+ def test_categorical_no_compress_string ():
934957 cats = Categorical (
935958 ["a" , "a" , "a" , "b" , "b" , "b" , "c" , "c" , "c" ],
936959 categories = ["a" , "b" , "c" , "d" ],
@@ -965,7 +988,7 @@ def test_sort():
965988 # has a sorted x axis
966989 # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')
967990
968- df = DataFrame ({"value" : np .random .default_rng (2 ).integers (0 , 10000 , 100 )})
991+ df = DataFrame ({"value" : np .random .default_rng (2 ).integers (0 , 10000 , 10 )})
969992 labels = [f"{ i } - { i + 499 } " for i in range (0 , 10000 , 500 )]
970993 cat_labels = Categorical (labels , labels )
971994
0 commit comments