diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index cd1cb0b64f74a..0d7bb75b060e2 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -499,6 +499,7 @@ Strings ^^^^^^^ - Bug in the :meth:`~Series.astype` method when converting "string" dtype data to nullable integer dtype (:issue:`32450`). +- Fixed issue where taking ``min`` or ``max`` of a ``StringArray`` or ``Series`` with ``StringDtype`` type would raise. (:issue:`31746`) - Bug in :meth:`Series.str.cat` returning ``NaN`` output when other had :class:`Index` type (:issue:`33425`) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 260cc69187d38..e988f5d97f7ee 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -218,7 +218,7 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): LOGICAL_FUNC_DEFAULTS = dict(out=None, keepdims=False) validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs") -MINMAX_DEFAULTS = dict(out=None, keepdims=False) +MINMAX_DEFAULTS = dict(axis=None, out=None, keepdims=False) validate_min = CompatValidator( MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1 ) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 3058e1d6073f3..e9950e0edaffb 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -5,6 +5,7 @@ from numpy.lib.mixins import NDArrayOperatorsMixin from pandas._libs import lib +from pandas._typing import Scalar from pandas.compat.numpy import function as nv from pandas.util._decorators import doc from pandas.util._validators import validate_fillna_kwargs @@ -17,6 +18,7 @@ from pandas import compat from pandas.core import nanops from pandas.core.algorithms import searchsorted, take, unique +from pandas.core.array_algos import masked_reductions from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin from pandas.core.construction import extract_array from pandas.core.indexers import check_array_indexer @@ -349,13 +351,19 @@ def all(self, axis=None, out=None, keepdims=False, skipna=True): nv.validate_all((), dict(out=out, keepdims=keepdims)) return nanops.nanall(self._ndarray, axis=axis, skipna=skipna) - def min(self, axis=None, out=None, keepdims=False, skipna=True): - nv.validate_min((), dict(out=out, keepdims=keepdims)) - return nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) + def min(self, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_min((), kwargs) + result = masked_reductions.min( + values=self.to_numpy(), mask=self.isna(), skipna=skipna + ) + return result - def max(self, axis=None, out=None, keepdims=False, skipna=True): - nv.validate_max((), dict(out=out, keepdims=keepdims)) - return nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) + def max(self, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_max((), kwargs) + result = masked_reductions.max( + values=self.to_numpy(), mask=self.isna(), skipna=skipna + ) + return result def sum( self, diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index dbca8e74f5e1b..51bbe182a002b 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -282,6 +282,9 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy) def _reduce(self, name, skipna=True, **kwargs): + if name in ["min", "max"]: + return getattr(self, name)(skipna=skipna) + raise TypeError(f"Cannot perform reduction '{name}' with string dtype") def value_counts(self, dropna=False): diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index fe770eed84b62..eb89798a1ad96 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -230,6 +230,32 @@ def test_reduce(skipna): assert result == "abc" +@pytest.mark.parametrize("method", ["min", "max"]) +@pytest.mark.parametrize("skipna", [True, False]) +def test_min_max(method, skipna): + arr = pd.Series(["a", "b", "c", None], dtype="string") + result = getattr(arr, method)(skipna=skipna) + if skipna: + expected = "a" if method == "min" else "c" + assert result == expected + else: + assert result is pd.NA + + +@pytest.mark.parametrize("method", ["min", "max"]) +@pytest.mark.parametrize( + "arr", + [ + pd.Series(["a", "b", "c", None], dtype="string"), + pd.array(["a", "b", "c", None], dtype="string"), + ], +) +def test_min_max_numpy(method, arr): + result = getattr(np, method)(arr) + expected = "a" if method == "min" else "c" + assert result == expected + + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.xfail(reason="Not implemented StringArray.sum") def test_reduce_missing(skipna): diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 86aed671f1b88..27a157d2127f6 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -77,7 +77,16 @@ class TestMissing(base.BaseMissingTests): class TestNoReduce(base.BaseNoReduceTests): - pass + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + + if op_name in ["min", "max"]: + return None + + s = pd.Series(data) + with pytest.raises(TypeError): + getattr(s, op_name)(skipna=skipna) class TestMethods(base.BaseMethodsTests):