diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 29060a93923eb..d4f58ec3086b0 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -667,6 +667,7 @@ Categorical same type as if one used the :meth:`.str.` / :meth:`.dt.` on a :class:`Series` of that type. E.g. when accessing :meth:`Series.dt.tz_localize` on a :class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue:`27952`) - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` that would give incorrect results on categorical data (:issue:`26988`) +- Bug where calling :meth:`Categorical.min` or :meth:`Categorical.max` on an empty Categorical would raise a numpy exception (:issue:`30227`) Datetimelike diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 7170aec3820a8..761d9907609c3 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2116,6 +2116,10 @@ def min(self, skipna=True): Only ordered `Categoricals` have a minimum! + .. versionchanged:: 1.0.0 + + Returns an NA value on empty arrays + Raises ------ TypeError @@ -2126,6 +2130,10 @@ def min(self, skipna=True): min : the minimum of this `Categorical` """ self.check_for_ordered("min") + + if not len(self._codes): + return self.dtype.na_value + good = self._codes != -1 if not good.all(): if skipna: @@ -2143,6 +2151,10 @@ def max(self, skipna=True): Only ordered `Categoricals` have a maximum! + .. versionchanged:: 1.0.0 + + Returns an NA value on empty arrays + Raises ------ TypeError @@ -2153,6 +2165,10 @@ def max(self, skipna=True): max : the maximum of this `Categorical` """ self.check_for_ordered("max") + + if not len(self._codes): + return self.dtype.na_value + good = self._codes != -1 if not good.all(): if skipna: diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 637a47eba0597..3c3d9d7a44aec 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -5,22 +5,23 @@ from pandas.compat import PYPY -from pandas import Categorical, Index, Series +from pandas import Categorical, Index, NaT, Series, date_range from pandas.api.types import is_scalar import pandas.util.testing as tm class TestCategoricalAnalytics: - def test_min_max(self): - + @pytest.mark.parametrize("aggregation", ["min", "max"]) + def test_min_max_not_ordered_raises(self, aggregation): # unordered cats have no min/max cat = Categorical(["a", "b", "c", "d"], ordered=False) msg = "Categorical is not ordered for operation {}" - with pytest.raises(TypeError, match=msg.format("min")): - cat.min() - with pytest.raises(TypeError, match=msg.format("max")): - cat.max() + agg_func = getattr(cat, aggregation) + + with pytest.raises(TypeError, match=msg.format(aggregation)): + agg_func() + def test_min_max_ordered(self): cat = Categorical(["a", "b", "c", "d"], ordered=True) _min = cat.min() _max = cat.max() @@ -35,6 +36,29 @@ def test_min_max(self): assert _min == "d" assert _max == "a" + @pytest.mark.parametrize( + "categories,expected", + [ + (list("ABC"), np.NaN), + ([1, 2, 3], np.NaN), + pytest.param( + Series(date_range("2020-01-01", periods=3), dtype="category"), + NaT, + marks=pytest.mark.xfail( + reason="https://github.com/pandas-dev/pandas/issues/29962" + ), + ), + ], + ) + @pytest.mark.parametrize("aggregation", ["min", "max"]) + def test_min_max_ordered_empty(self, categories, expected, aggregation): + # GH 30227 + cat = Categorical([], categories=list("ABC"), ordered=True) + + agg_func = getattr(cat, aggregation) + result = agg_func() + assert result is expected + @pytest.mark.parametrize("skipna", [True, False]) def test_min_max_with_nan(self, skipna): # GH 25303