diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 73201fa93a8aa..ceaf3a953f046 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -152,7 +152,7 @@ Conversion Strings ^^^^^^^ -- +- Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`) - Interval diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index b73b49eca3e18..6c4413052c12d 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -542,7 +542,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: def value_counts(self, dropna: bool = True) -> Series: from pandas.core.algorithms import value_counts_internal as value_counts - result = value_counts(self._ndarray, dropna=dropna).astype("Int64") + result = value_counts(self._ndarray, sort=False, dropna=dropna).astype("Int64") result.index = result.index.astype(self.dtype) return result diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index f67268616a021..cb324d29258c0 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -584,6 +584,19 @@ def test_value_counts_with_normalize(dtype): tm.assert_series_equal(result, expected) +def test_value_counts_sort_false(dtype): + if getattr(dtype, "storage", "") == "pyarrow": + exp_dtype = "int64[pyarrow]" + elif getattr(dtype, "storage", "") == "pyarrow_numpy": + exp_dtype = "int64" + else: + exp_dtype = "Int64" + ser = pd.Series(["a", "b", "c", "b"], dtype=dtype) + result = ser.value_counts(sort=False) + expected = pd.Series([1, 2, 1], index=ser[:3], dtype=exp_dtype, name="count") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( "values, expected", [