Skip to content

Commit f49269f

Browse files
authored
ENH: Implement StringArray.min / max (#33351)
1 parent 31875eb commit f49269f

File tree

6 files changed

+55
-8
lines changed

6 files changed

+55
-8
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ Strings
500500
^^^^^^^
501501

502502
- Bug in the :meth:`~Series.astype` method when converting "string" dtype data to nullable integer dtype (:issue:`32450`).
503+
- Fixed issue where taking ``min`` or ``max`` of a ``StringArray`` or ``Series`` with ``StringDtype`` type would raise. (:issue:`31746`)
503504
- Bug in :meth:`Series.str.cat` returning ``NaN`` output when other had :class:`Index` type (:issue:`33425`)
504505

505506

pandas/compat/numpy/function.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name):
218218
LOGICAL_FUNC_DEFAULTS = dict(out=None, keepdims=False)
219219
validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs")
220220

221-
MINMAX_DEFAULTS = dict(out=None, keepdims=False)
221+
MINMAX_DEFAULTS = dict(axis=None, out=None, keepdims=False)
222222
validate_min = CompatValidator(
223223
MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1
224224
)

pandas/core/arrays/numpy_.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from numpy.lib.mixins import NDArrayOperatorsMixin
66

77
from pandas._libs import lib
8+
from pandas._typing import Scalar
89
from pandas.compat.numpy import function as nv
910
from pandas.util._decorators import doc
1011
from pandas.util._validators import validate_fillna_kwargs
@@ -17,6 +18,7 @@
1718
from pandas import compat
1819
from pandas.core import nanops
1920
from pandas.core.algorithms import searchsorted, take, unique
21+
from pandas.core.array_algos import masked_reductions
2022
from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
2123
from pandas.core.construction import extract_array
2224
from pandas.core.indexers import check_array_indexer
@@ -349,13 +351,19 @@ def all(self, axis=None, out=None, keepdims=False, skipna=True):
349351
nv.validate_all((), dict(out=out, keepdims=keepdims))
350352
return nanops.nanall(self._ndarray, axis=axis, skipna=skipna)
351353

352-
def min(self, axis=None, out=None, keepdims=False, skipna=True):
353-
nv.validate_min((), dict(out=out, keepdims=keepdims))
354-
return nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
354+
def min(self, skipna: bool = True, **kwargs) -> Scalar:
355+
nv.validate_min((), kwargs)
356+
result = masked_reductions.min(
357+
values=self.to_numpy(), mask=self.isna(), skipna=skipna
358+
)
359+
return result
355360

356-
def max(self, axis=None, out=None, keepdims=False, skipna=True):
357-
nv.validate_max((), dict(out=out, keepdims=keepdims))
358-
return nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
361+
def max(self, skipna: bool = True, **kwargs) -> Scalar:
362+
nv.validate_max((), kwargs)
363+
result = masked_reductions.max(
364+
values=self.to_numpy(), mask=self.isna(), skipna=skipna
365+
)
366+
return result
359367

360368
def sum(
361369
self,

pandas/core/arrays/string_.py

+3
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,9 @@ def astype(self, dtype, copy=True):
282282
return super().astype(dtype, copy)
283283

284284
def _reduce(self, name, skipna=True, **kwargs):
285+
if name in ["min", "max"]:
286+
return getattr(self, name)(skipna=skipna)
287+
285288
raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
286289

287290
def value_counts(self, dropna=False):

pandas/tests/arrays/string_/test_string.py

+26
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,32 @@ def test_reduce(skipna):
230230
assert result == "abc"
231231

232232

233+
@pytest.mark.parametrize("method", ["min", "max"])
234+
@pytest.mark.parametrize("skipna", [True, False])
235+
def test_min_max(method, skipna):
236+
arr = pd.Series(["a", "b", "c", None], dtype="string")
237+
result = getattr(arr, method)(skipna=skipna)
238+
if skipna:
239+
expected = "a" if method == "min" else "c"
240+
assert result == expected
241+
else:
242+
assert result is pd.NA
243+
244+
245+
@pytest.mark.parametrize("method", ["min", "max"])
246+
@pytest.mark.parametrize(
247+
"arr",
248+
[
249+
pd.Series(["a", "b", "c", None], dtype="string"),
250+
pd.array(["a", "b", "c", None], dtype="string"),
251+
],
252+
)
253+
def test_min_max_numpy(method, arr):
254+
result = getattr(np, method)(arr)
255+
expected = "a" if method == "min" else "c"
256+
assert result == expected
257+
258+
233259
@pytest.mark.parametrize("skipna", [True, False])
234260
@pytest.mark.xfail(reason="Not implemented StringArray.sum")
235261
def test_reduce_missing(skipna):

pandas/tests/extension/test_string.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,16 @@ class TestMissing(base.BaseMissingTests):
7777

7878

7979
class TestNoReduce(base.BaseNoReduceTests):
80-
pass
80+
@pytest.mark.parametrize("skipna", [True, False])
81+
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
82+
op_name = all_numeric_reductions
83+
84+
if op_name in ["min", "max"]:
85+
return None
86+
87+
s = pd.Series(data)
88+
with pytest.raises(TypeError):
89+
getattr(s, op_name)(skipna=skipna)
8190

8291

8392
class TestMethods(base.BaseMethodsTests):

0 commit comments

Comments
 (0)