diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7823f74b7a153..ff3fd65b9bcea 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -41,6 +41,7 @@ Other enhancements - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) +- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 8d6880fc2acb3..6a3cf4590568c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -6,6 +6,7 @@ from shutil import get_terminal_size from typing import ( TYPE_CHECKING, + Callable, Literal, cast, overload, @@ -2508,6 +2509,28 @@ def equals(self, other: object) -> bool: return np.array_equal(self._codes, other._codes) return False + def _accumulate(self, name: str, skipna: bool = True, **kwargs) -> Self: + func: Callable + if name == "cummin": + func = np.minimum.accumulate + elif name == "cummax": + func = np.maximum.accumulate + else: + raise TypeError(f"Accumulation {name} not supported for {type(self)}") + self.check_for_ordered(name) + + codes = self.codes.copy() + mask = self.isna() + if func == np.minimum.accumulate: + codes[mask] = np.iinfo(codes.dtype.type).max + # no need to change codes for maximum because codes[mask] is already -1 + if not skipna: + mask = np.maximum.accumulate(mask) + + codes = func(codes) + codes[mask] = -1 + return self._simple_new(codes, dtype=self._dtype) + @classmethod def _concat_same_type(cls, to_concat: Sequence[Self], axis: AxisInt = 0) -> Self: from pandas.core.dtypes.concat import union_categoricals diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py index 9b7b08127a550..a9d5486139b46 100644 --- a/pandas/tests/series/test_cumulative.py +++ b/pandas/tests/series/test_cumulative.py @@ -170,6 +170,58 @@ def test_cummethods_bool_in_object_dtype(self, method, expected): result = getattr(ser, method)() tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "method, order", + [ + ["cummax", "abc"], + ["cummin", "cba"], + ], + ) + def test_cummax_cummin_on_ordered_categorical(self, method, order): + # GH#52335 + cat = pd.CategoricalDtype(list(order), ordered=True) + ser = pd.Series( + list("ababcab"), + dtype=cat, + ) + result = getattr(ser, method)() + expected = pd.Series( + list("abbbccc"), + dtype=cat, + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "skip, exp", + [ + [True, ["a", np.nan, "b", "b", "c"]], + [False, ["a", np.nan, np.nan, np.nan, np.nan]], + ], + ) + @pytest.mark.parametrize( + "method, order", + [ + ["cummax", "abc"], + ["cummin", "cba"], + ], + ) + def test_cummax_cummin_ordered_categorical_nan(self, skip, exp, method, order): + # GH#52335 + cat = pd.CategoricalDtype(list(order), ordered=True) + ser = pd.Series( + ["a", np.nan, "b", "a", "c"], + dtype=cat, + ) + result = getattr(ser, method)(skipna=skip) + expected = pd.Series( + exp, + dtype=cat, + ) + tm.assert_series_equal( + result, + expected, + ) + def test_cumprod_timedelta(self): # GH#48111 ser = pd.Series([pd.Timedelta(days=1), pd.Timedelta(days=3)])