REGR: Series.nlargest with masked arrays (#42838)

jbrockmendel · web-flow · commit 61cbb736f1f1 · 2021-08-10T17:45:15.000-04:00
diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst
@@ -23,6 +23,7 @@ Fixed regressions
 - Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`)
 - Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`)
 - Fixed regression in :meth:`.Styler.highlight_min` and :meth:`.Styler.highlight_max` where ``pandas.NA`` was not successfully ignored (:issue:`42650`)
+- Regression in :meth:`Series.nlargest` and :meth:`Series.nsmallest` with nullable integer or float dtype (:issue:`41816`)
 - Fixed regression in :meth:`pandas.Series.quantile` with :class:`pandas.Int64Dtype` (:issue:`42626`)
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -11,6 +11,7 @@
     Literal,
     Union,
     cast,
+    final,
 )
 from warnings import warn
 
@@ -1209,12 +1210,15 @@ def __init__(self, obj, n: int, keep: str):
     def compute(self, method: str) -> DataFrame | Series:
         raise NotImplementedError
 
+    @final
     def nlargest(self):
         return self.compute("nlargest")
 
+    @final
     def nsmallest(self):
         return self.compute("nsmallest")
 
+    @final
     @staticmethod
     def is_valid_dtype_n_method(dtype: DtypeObj) -> bool:
         """
@@ -1253,6 +1257,18 @@ def compute(self, method: str) -> Series:
 
         dropped = self.obj.dropna()
 
+        if is_extension_array_dtype(dropped.dtype):
+            # GH#41816 bc we have dropped NAs above, MaskedArrays can use the
+            #  numpy logic.
+            from pandas.core.arrays import BaseMaskedArray
+
+            arr = dropped._values
+            if isinstance(arr, BaseMaskedArray):
+                ser = type(dropped)(arr._data, index=dropped.index, name=dropped.name)
+
+                result = type(self)(ser, n=self.n, keep=self.keep).compute(method)
+                return result.astype(arr.dtype)
+
         # slow method
         if n >= len(self.obj):
             ascending = method == "nsmallest"
diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py
@@ -211,3 +211,19 @@ def test_nlargest_boolean(self, data, expected):
         result = ser.nlargest(1)
         expected = Series(expected)
         tm.assert_series_equal(result, expected)
+
+    def test_nlargest_nullable(self, any_nullable_numeric_dtype):
+        # GH#42816
+        dtype = any_nullable_numeric_dtype
+        arr = np.random.randn(10).astype(dtype.lower(), copy=False)
+
+        ser = Series(arr.copy(), dtype=dtype)
+        ser[1] = pd.NA
+        result = ser.nlargest(5)
+
+        expected = (
+            Series(np.delete(arr, 1), index=ser.index.delete(1))
+            .nlargest(5)
+            .astype(dtype)
+        )
+        tm.assert_series_equal(result, expected)