Skip to content

Commit 1657a13

Browse files
bdwzhangumichxiangchrismroeschke
authored andcommitted
ENH: Implement cummax and cummin in _accumulate() for ordered Categorical arrays (pandas-dev#58360)
* Added tests with and without np.nan * Added tests for cummin and cummax * Fixed series tests expected series, rewrote categorical arrays to use pd.Categorical * Fixed cat not defined error and misspelling * Implement _accumulate for Categorical * fixed misspellings in tests * fixed expected categories on tests * Updated whatsnew * Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <[email protected]> * Removed testing for _accumulate. * Moved categorical_accumulations.py logic to categorical.py * Assigned expected results to expected variable; Added pytest.mark.parametrize to test_cummax_cummin_ordered_categorical_nan with skipna and expected data --------- Co-authored-by: Christopher Xiang <[email protected]> Co-authored-by: Matthew Roeschke <[email protected]> Co-authored-by: Chris Xiang <[email protected]>
1 parent 1e80850 commit 1657a13

File tree

3 files changed

+76
-0
lines changed

3 files changed

+76
-0
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ Other enhancements
4141
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
4242
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
4343
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
44+
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
4445

4546
.. ---------------------------------------------------------------------------
4647
.. _whatsnew_300.notable_bug_fixes:

pandas/core/arrays/categorical.py

+23
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from shutil import get_terminal_size
77
from typing import (
88
TYPE_CHECKING,
9+
Callable,
910
Literal,
1011
cast,
1112
overload,
@@ -2508,6 +2509,28 @@ def equals(self, other: object) -> bool:
25082509
return np.array_equal(self._codes, other._codes)
25092510
return False
25102511

2512+
def _accumulate(self, name: str, skipna: bool = True, **kwargs) -> Self:
2513+
func: Callable
2514+
if name == "cummin":
2515+
func = np.minimum.accumulate
2516+
elif name == "cummax":
2517+
func = np.maximum.accumulate
2518+
else:
2519+
raise TypeError(f"Accumulation {name} not supported for {type(self)}")
2520+
self.check_for_ordered(name)
2521+
2522+
codes = self.codes.copy()
2523+
mask = self.isna()
2524+
if func == np.minimum.accumulate:
2525+
codes[mask] = np.iinfo(codes.dtype.type).max
2526+
# no need to change codes for maximum because codes[mask] is already -1
2527+
if not skipna:
2528+
mask = np.maximum.accumulate(mask)
2529+
2530+
codes = func(codes)
2531+
codes[mask] = -1
2532+
return self._simple_new(codes, dtype=self._dtype)
2533+
25112534
@classmethod
25122535
def _concat_same_type(cls, to_concat: Sequence[Self], axis: AxisInt = 0) -> Self:
25132536
from pandas.core.dtypes.concat import union_categoricals

pandas/tests/series/test_cumulative.py

+52
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,58 @@ def test_cummethods_bool_in_object_dtype(self, method, expected):
170170
result = getattr(ser, method)()
171171
tm.assert_series_equal(result, expected)
172172

173+
@pytest.mark.parametrize(
174+
"method, order",
175+
[
176+
["cummax", "abc"],
177+
["cummin", "cba"],
178+
],
179+
)
180+
def test_cummax_cummin_on_ordered_categorical(self, method, order):
181+
# GH#52335
182+
cat = pd.CategoricalDtype(list(order), ordered=True)
183+
ser = pd.Series(
184+
list("ababcab"),
185+
dtype=cat,
186+
)
187+
result = getattr(ser, method)()
188+
expected = pd.Series(
189+
list("abbbccc"),
190+
dtype=cat,
191+
)
192+
tm.assert_series_equal(result, expected)
193+
194+
@pytest.mark.parametrize(
195+
"skip, exp",
196+
[
197+
[True, ["a", np.nan, "b", "b", "c"]],
198+
[False, ["a", np.nan, np.nan, np.nan, np.nan]],
199+
],
200+
)
201+
@pytest.mark.parametrize(
202+
"method, order",
203+
[
204+
["cummax", "abc"],
205+
["cummin", "cba"],
206+
],
207+
)
208+
def test_cummax_cummin_ordered_categorical_nan(self, skip, exp, method, order):
209+
# GH#52335
210+
cat = pd.CategoricalDtype(list(order), ordered=True)
211+
ser = pd.Series(
212+
["a", np.nan, "b", "a", "c"],
213+
dtype=cat,
214+
)
215+
result = getattr(ser, method)(skipna=skip)
216+
expected = pd.Series(
217+
exp,
218+
dtype=cat,
219+
)
220+
tm.assert_series_equal(
221+
result,
222+
expected,
223+
)
224+
173225
def test_cumprod_timedelta(self):
174226
# GH#48111
175227
ser = pd.Series([pd.Timedelta(days=1), pd.Timedelta(days=3)])

0 commit comments

Comments
 (0)