diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3569a578943d4..4e0cc93e64458 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -948,7 +948,7 @@ def value_counts_internal( result = Series(counts, index=idx, name=name, copy=False) if sort: - result = result.sort_values(ascending=ascending) + result = result.sort_values(ascending=ascending, kind="stable") if normalize: result = result / counts.sum() diff --git a/pandas/core/base.py b/pandas/core/base.py index b417cf1487417..200b16b4b6b1a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -993,7 +993,12 @@ def value_counts( If True then the object returned will contain the relative frequencies of the unique values. sort : bool, default True - Sort by frequencies when True. Preserve the order of the data when False. + Stable sort by frequencies when True. Preserve the order of the data + when False. + + .. versionchanged:: 3.0.0 + + Prior to 3.0.0, the sort was unstable. ascending : bool, default False Sort in ascending order. bins : int, optional diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2b9adb6230028..60141548db9f0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7761,11 +7761,16 @@ def value_counts( normalize : bool, default False Return proportions rather than frequencies. sort : bool, default True - Sort by frequencies when True. Preserve the order of the data when False. + Stable sort by frequencies when True. Preserve the order of the data + when False. .. versionchanged:: 3.0.0 Prior to 3.0.0, ``sort=False`` would sort by the columns values. + + .. versionchanged:: 3.0.0 + + Prior to 3.0.0, the sort was unstable. ascending : bool, default False Sort in ascending order. dropna : bool, default True @@ -7875,7 +7880,7 @@ def value_counts( counts.name = name if sort: - counts = counts.sort_values(ascending=ascending) + counts = counts.sort_values(ascending=ascending, kind="stable") if normalize: counts /= counts.sum() diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 93e04fe61555e..dfa875249afff 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2776,8 +2776,8 @@ def value_counts( normalize : bool, default False Return proportions rather than frequencies. sort : bool, default True - Sort by frequencies when True. When False, non-grouping columns will appear - in the order they occur in within groups. + Stable sort by frequencies when True. When False, non-grouping + columns will appear in the order they occur in within groups. .. versionchanged:: 3.0.0 diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index c876aae6dea5e..588cd08a6b618 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1446,6 +1446,19 @@ def test_value_counts_series(self): ) tm.assert_series_equal(result, expected) + def test_value_counts_stability(self): + # GH 63155 + arr = np.random.default_rng(2).integers(0, 32, 64) + result = algos.value_counts_internal(arr, sort=True) + + value_counts = Series(arr).value_counts(sort=False) + expected = value_counts.sort_values(ascending=False, kind="stable") + tm.assert_series_equal(result, expected) + + unstable_sorted = value_counts.sort_values(ascending=False, kind="quicksort") + with pytest.raises(AssertionError): + tm.assert_series_equal(result, unstable_sorted) + class TestDuplicated: def test_duplicated_with_nas(self):