From bb357ac3e29136ba78e19cf02608cff0b180ac55 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 7 May 2020 19:40:55 +0100 Subject: [PATCH 1/4] more informative error message with np.min or np.max on unordered Categorical --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/arrays/categorical.py | 4 ++-- pandas/tests/arrays/categorical/test_analytics.py | 12 ++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9c424f70b1ee0..52a94c9718e5a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -778,6 +778,7 @@ Other - Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`) - Bug in :meth:`DataFrame.plot.scatter` caused an error when plotting variable marker sizes (:issue:`32904`) - :class:`IntegerArray` now implements the ``sum`` operation (:issue:`33172`) +- more informative error message with ``np.min`` or ``np.max`` on unordered Categorical (:issue:`33115`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index efb53e5a89314..9b7d53fb91f08 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2071,7 +2071,7 @@ def _reduce(self, name, axis=0, **kwargs): return func(**kwargs) @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") - def min(self, skipna=True): + def min(self, skipna=True, **kwargs): """ The minimum value of the object. @@ -2106,7 +2106,7 @@ def min(self, skipna=True): return self.categories[pointer] @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") - def max(self, skipna=True): + def max(self, skipna=True, **kwargs): """ The maximum value of the object. diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 83ecebafbea07..ab57f089edd3c 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -1,3 +1,4 @@ +import re import sys import numpy as np @@ -93,6 +94,17 @@ def test_deprecate_numeric_only_min_max(self, method): with tm.assert_produces_warning(expected_warning=FutureWarning): getattr(cat, method)(numeric_only=True) + @pytest.mark.parametrize("method", ["min", "max"]) + def test_numpy_min_max_raises(self, method): + cat = Categorical(["a", "b", "c", "b"], ordered=False) + msg = ( + f"Categorical is not ordered for operation {method}\n" + "you can use .as_ordered() to change the Categorical to an ordered one" + ) + method = getattr(np, method) + with pytest.raises(TypeError, match=re.escape(msg)): + method(cat) + @pytest.mark.parametrize( "values,categories,exp_mode", [ From 98e23a752f73b219151c26a079d903ba48e3ca22 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 8 May 2020 10:37:36 +0100 Subject: [PATCH 2/4] address comments --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/arrays/categorical.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 52a94c9718e5a..b3015d071a247 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -778,7 +778,7 @@ Other - Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`) - Bug in :meth:`DataFrame.plot.scatter` caused an error when plotting variable marker sizes (:issue:`32904`) - :class:`IntegerArray` now implements the ``sum`` operation (:issue:`33172`) -- more informative error message with ``np.min`` or ``np.max`` on unordered Categorical (:issue:`33115`) +- More informative error message with ``np.min`` or ``np.max`` on unordered :class:`Categorical` (:issue:`33115`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9b7d53fb91f08..a468a5da29073 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2090,6 +2090,7 @@ def min(self, skipna=True, **kwargs): ------- min : the minimum of this `Categorical` """ + # TODO: validate kwargs self.check_for_ordered("min") if not len(self._codes): @@ -2125,6 +2126,7 @@ def max(self, skipna=True, **kwargs): ------- max : the maximum of this `Categorical` """ + # TODO: validate kwargs self.check_for_ordered("max") if not len(self._codes): From df1878a6708bcebe2ae96224cb4f0ecd87b37d2c Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 16 May 2020 11:19:13 +0100 Subject: [PATCH 3/4] validate kwargs --- pandas/core/arrays/categorical.py | 5 +++-- .../arrays/categorical/test_analytics.py | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 91257356b64b8..a6f359e4cfa40 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -9,6 +9,7 @@ from pandas._libs import NaT, algos as libalgos, hashtable as htable from pandas._typing import ArrayLike, Dtype, Ordered, Scalar +from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly, deprecate_kwarg, doc from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs @@ -2092,7 +2093,7 @@ def min(self, skipna=True, **kwargs): ------- min : the minimum of this `Categorical` """ - # TODO: validate kwargs + nv.validate_min((), kwargs) self.check_for_ordered("min") if not len(self._codes): @@ -2128,7 +2129,7 @@ def max(self, skipna=True, **kwargs): ------- max : the maximum of this `Categorical` """ - # TODO: validate kwargs + nv.validate_max((), kwargs) self.check_for_ordered("max") if not len(self._codes): diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index ab57f089edd3c..92d61a84a4cd6 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -105,6 +105,26 @@ def test_numpy_min_max_raises(self, method): with pytest.raises(TypeError, match=re.escape(msg)): method(cat) + @pytest.mark.parametrize("kwarg", ["axis", "out", "keepdims"]) + @pytest.mark.parametrize("method", ["min", "max"]) + def test_numpy_min_max_unsupported_kwargs_raises(self, method, kwarg): + cat = Categorical(["a", "b", "c", "b"], ordered=True) + msg = ( + f"the '{kwarg}' parameter is not supported in the pandas implementation of" + f" {method}" + ) + kwargs = {kwarg: 42} + method = getattr(np, method) + with pytest.raises(ValueError, match=msg): + method(cat, **kwargs) + + @pytest.mark.parametrize("method, expected", [("min", "a"), ("max", "c")]) + def test_numpy_min_max_axis_equals_none(self, method, expected): + cat = Categorical(["a", "b", "c", "b"], ordered=True) + method = getattr(np, method) + result = method(cat, axis=None) + assert result == expected + @pytest.mark.parametrize( "values,categories,exp_mode", [ From 13fb3094c9d19489933b045fa6b1ddbf940ab6a2 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 16 May 2020 12:03:14 +0100 Subject: [PATCH 4/4] linting --- pandas/tests/arrays/categorical/test_analytics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 92d61a84a4cd6..4bf9b4b40d0b6 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -110,8 +110,8 @@ def test_numpy_min_max_raises(self, method): def test_numpy_min_max_unsupported_kwargs_raises(self, method, kwarg): cat = Categorical(["a", "b", "c", "b"], ordered=True) msg = ( - f"the '{kwarg}' parameter is not supported in the pandas implementation of" - f" {method}" + f"the '{kwarg}' parameter is not supported in the pandas implementation " + f"of {method}" ) kwargs = {kwarg: 42} method = getattr(np, method)