diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index b84f3c83284fa..e1c55d5621adb 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -23,6 +23,7 @@ Fixed regressions - Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) - Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`) - Fixed regression in :meth:`.Styler.highlight_min` and :meth:`.Styler.highlight_max` where ``pandas.NA`` was not successfully ignored (:issue:`42650`) +- Regression in :meth:`Series.nlargest` and :meth:`Series.nsmallest` with nullable integer or float dtype (:issue:`41816`) - Fixed regression in :meth:`pandas.Series.quantile` with :class:`pandas.Int64Dtype` (:issue:`42626`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 52c5790e1fc24..4f9dd61b8e0da 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -11,6 +11,7 @@ Literal, Union, cast, + final, ) from warnings import warn @@ -1209,12 +1210,15 @@ def __init__(self, obj, n: int, keep: str): def compute(self, method: str) -> DataFrame | Series: raise NotImplementedError + @final def nlargest(self): return self.compute("nlargest") + @final def nsmallest(self): return self.compute("nsmallest") + @final @staticmethod def is_valid_dtype_n_method(dtype: DtypeObj) -> bool: """ @@ -1253,6 +1257,18 @@ def compute(self, method: str) -> Series: dropped = self.obj.dropna() + if is_extension_array_dtype(dropped.dtype): + # GH#41816 bc we have dropped NAs above, MaskedArrays can use the + # numpy logic. + from pandas.core.arrays import BaseMaskedArray + + arr = dropped._values + if isinstance(arr, BaseMaskedArray): + ser = type(dropped)(arr._data, index=dropped.index, name=dropped.name) + + result = type(self)(ser, n=self.n, keep=self.keep).compute(method) + return result.astype(arr.dtype) + # slow method if n >= len(self.obj): ascending = method == "nsmallest" diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index 3af06145b9fcd..0efb0663a0327 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -211,3 +211,19 @@ def test_nlargest_boolean(self, data, expected): result = ser.nlargest(1) expected = Series(expected) tm.assert_series_equal(result, expected) + + def test_nlargest_nullable(self, any_nullable_numeric_dtype): + # GH#42816 + dtype = any_nullable_numeric_dtype + arr = np.random.randn(10).astype(dtype.lower(), copy=False) + + ser = Series(arr.copy(), dtype=dtype) + ser[1] = pd.NA + result = ser.nlargest(5) + + expected = ( + Series(np.delete(arr, 1), index=ser.index.delete(1)) + .nlargest(5) + .astype(dtype) + ) + tm.assert_series_equal(result, expected)