Skip to content

Commit 37526c1

Browse files
makbigcjorisvandenbossche
authored andcommitted
API/DEPR: Change default skipna behaviour + deprecate numeric_only in Categorical.min and max (#27929)
1 parent 7e791e4 commit 37526c1

File tree

5 files changed

+85
-57
lines changed

5 files changed

+85
-57
lines changed

doc/source/whatsnew/v1.0.0.rst

+22
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,26 @@ The following methods now also correctly output values for unobserved categories
304304
df.groupby(["cat_1", "cat_2"], observed=False)["value"].count()
305305
306306
307+
By default :meth:`Categorical.min` now returns the minimum instead of np.nan
308+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
309+
310+
When :class:`Categorical` contains ``np.nan``,
311+
:meth:`Categorical.min` no longer return ``np.nan`` by default (skipna=True) (:issue:`25303`)
312+
313+
*pandas 0.25.x*
314+
315+
.. code-block:: ipython
316+
317+
In [1]: pd.Categorical([1, 2, np.nan], ordered=True).min()
318+
Out[1]: nan
319+
320+
321+
*pandas 1.0.0*
322+
323+
.. ipython:: python
324+
325+
pd.Categorical([1, 2, np.nan], ordered=True).min()
326+
307327
.. _whatsnew_1000.api_breaking.deps:
308328

309329
Increased minimum versions for dependencies
@@ -410,6 +430,8 @@ Deprecations
410430
- :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`)
411431
- :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`)
412432
- :meth:`Categorical.take_nd` is deprecated, use :meth:`Categorical.take` instead (:issue:`27745`)
433+
- The parameter ``numeric_only`` of :meth:`Categorical.min` and :meth:`Categorical.max` is deprecated and replaced with ``skipna`` (:issue:`25303`)
434+
-
413435

414436
.. _whatsnew_1000.prior_deprecations:
415437

pandas/core/arrays/categorical.py

+20-18
Original file line numberDiff line numberDiff line change
@@ -2123,7 +2123,8 @@ def _reduce(self, name, axis=0, **kwargs):
21232123
raise TypeError(f"Categorical cannot perform the operation {name}")
21242124
return func(**kwargs)
21252125

2126-
def min(self, numeric_only=None, **kwargs):
2126+
@deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna")
2127+
def min(self, skipna=True):
21272128
"""
21282129
The minimum value of the object.
21292130
@@ -2139,17 +2140,18 @@ def min(self, numeric_only=None, **kwargs):
21392140
min : the minimum of this `Categorical`
21402141
"""
21412142
self.check_for_ordered("min")
2142-
if numeric_only:
2143-
good = self._codes != -1
2144-
pointer = self._codes[good].min(**kwargs)
2145-
else:
2146-
pointer = self._codes.min(**kwargs)
2147-
if pointer == -1:
2148-
return np.nan
2143+
good = self._codes != -1
2144+
if not good.all():
2145+
if skipna:
2146+
pointer = self._codes[good].min()
2147+
else:
2148+
return np.nan
21492149
else:
2150-
return self.categories[pointer]
2150+
pointer = self._codes.min()
2151+
return self.categories[pointer]
21512152

2152-
def max(self, numeric_only=None, **kwargs):
2153+
@deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna")
2154+
def max(self, skipna=True):
21532155
"""
21542156
The maximum value of the object.
21552157
@@ -2165,15 +2167,15 @@ def max(self, numeric_only=None, **kwargs):
21652167
max : the maximum of this `Categorical`
21662168
"""
21672169
self.check_for_ordered("max")
2168-
if numeric_only:
2169-
good = self._codes != -1
2170-
pointer = self._codes[good].max(**kwargs)
2171-
else:
2172-
pointer = self._codes.max(**kwargs)
2173-
if pointer == -1:
2174-
return np.nan
2170+
good = self._codes != -1
2171+
if not good.all():
2172+
if skipna:
2173+
pointer = self._codes[good].max()
2174+
else:
2175+
return np.nan
21752176
else:
2176-
return self.categories[pointer]
2177+
pointer = self._codes.max()
2178+
return self.categories[pointer]
21772179

21782180
def mode(self, dropna=True):
21792181
"""

pandas/core/series.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -3820,9 +3820,7 @@ def _reduce(
38203820
self._get_axis_number(axis)
38213821

38223822
if isinstance(delegate, Categorical):
3823-
# TODO deprecate numeric_only argument for Categorical and use
3824-
# skipna as well, see GH25303
3825-
return delegate._reduce(name, numeric_only=numeric_only, **kwds)
3823+
return delegate._reduce(name, skipna=skipna, **kwds)
38263824
elif isinstance(delegate, ExtensionArray):
38273825
# dispatch to ExtensionArray interface
38283826
return delegate._reduce(name, skipna=skipna, **kwds)

pandas/tests/arrays/categorical/test_analytics.py

+29-17
Original file line numberDiff line numberDiff line change
@@ -35,31 +35,43 @@ def test_min_max(self):
3535
assert _min == "d"
3636
assert _max == "a"
3737

38+
@pytest.mark.parametrize("skipna", [True, False])
39+
def test_min_max_with_nan(self, skipna):
40+
# GH 25303
3841
cat = Categorical(
3942
[np.nan, "b", "c", np.nan], categories=["d", "c", "b", "a"], ordered=True
4043
)
41-
_min = cat.min()
42-
_max = cat.max()
43-
assert np.isnan(_min)
44-
assert _max == "b"
44+
_min = cat.min(skipna=skipna)
45+
_max = cat.max(skipna=skipna)
4546

46-
_min = cat.min(numeric_only=True)
47-
assert _min == "c"
48-
_max = cat.max(numeric_only=True)
49-
assert _max == "b"
47+
if skipna is False:
48+
assert np.isnan(_min)
49+
assert np.isnan(_max)
50+
else:
51+
assert _min == "c"
52+
assert _max == "b"
5053

5154
cat = Categorical(
5255
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
5356
)
54-
_min = cat.min()
55-
_max = cat.max()
56-
assert np.isnan(_min)
57-
assert _max == 1
58-
59-
_min = cat.min(numeric_only=True)
60-
assert _min == 2
61-
_max = cat.max(numeric_only=True)
62-
assert _max == 1
57+
_min = cat.min(skipna=skipna)
58+
_max = cat.max(skipna=skipna)
59+
60+
if skipna is False:
61+
assert np.isnan(_min)
62+
assert np.isnan(_max)
63+
else:
64+
assert _min == 2
65+
assert _max == 1
66+
67+
@pytest.mark.parametrize("method", ["min", "max"])
68+
def test_deprecate_numeric_only_min_max(self, method):
69+
# GH 25303
70+
cat = Categorical(
71+
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
72+
)
73+
with tm.assert_produces_warning(expected_warning=FutureWarning):
74+
getattr(cat, method)(numeric_only=True)
6375

6476
@pytest.mark.parametrize(
6577
"values,categories,exp_mode",

pandas/tests/reductions/test_reductions.py

+13-19
Original file line numberDiff line numberDiff line change
@@ -1043,7 +1043,7 @@ def test_min_max(self):
10431043
)
10441044
_min = cat.min()
10451045
_max = cat.max()
1046-
assert np.isnan(_min)
1046+
assert _min == "c"
10471047
assert _max == "b"
10481048

10491049
cat = Series(
@@ -1053,30 +1053,24 @@ def test_min_max(self):
10531053
)
10541054
_min = cat.min()
10551055
_max = cat.max()
1056-
assert np.isnan(_min)
1056+
assert _min == 2
10571057
assert _max == 1
10581058

1059-
def test_min_max_numeric_only(self):
1060-
# TODO deprecate numeric_only argument for Categorical and use
1061-
# skipna as well, see GH25303
1059+
@pytest.mark.parametrize("skipna", [True, False])
1060+
def test_min_max_skipna(self, skipna):
1061+
# GH 25303
10621062
cat = Series(
10631063
Categorical(["a", "b", np.nan, "a"], categories=["b", "a"], ordered=True)
10641064
)
1065+
_min = cat.min(skipna=skipna)
1066+
_max = cat.max(skipna=skipna)
10651067

1066-
_min = cat.min()
1067-
_max = cat.max()
1068-
assert np.isnan(_min)
1069-
assert _max == "a"
1070-
1071-
_min = cat.min(numeric_only=True)
1072-
_max = cat.max(numeric_only=True)
1073-
assert _min == "b"
1074-
assert _max == "a"
1075-
1076-
_min = cat.min(numeric_only=False)
1077-
_max = cat.max(numeric_only=False)
1078-
assert np.isnan(_min)
1079-
assert _max == "a"
1068+
if skipna is True:
1069+
assert _min == "b"
1070+
assert _max == "a"
1071+
else:
1072+
assert np.isnan(_min)
1073+
assert np.isnan(_max)
10801074

10811075

10821076
class TestSeriesMode:

0 commit comments

Comments
 (0)