From 0c1ab8fa5f9b402e1a68604ff6ad3c6c89138153 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 31 Jul 2021 16:30:30 -0700 Subject: [PATCH 1/3] REGR: Series.nlargest with masked arrays --- doc/source/whatsnew/v1.3.2.rst | 2 +- pandas/core/algorithms.py | 16 ++++++++++++++++ pandas/tests/series/methods/test_nlargest.py | 11 +++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 116bdd6e1d98f..c2f9046c96108 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -21,7 +21,7 @@ Fixed regressions - Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`) - Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) - Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`) -- +- Regression in :meth:`Series.nlargest` and :meth:`Series.nsmallest` with nullabe integer or float dtype (:issue:`41816`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index df9f3d07ce7fd..572fd36e4ce25 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -11,6 +11,7 @@ Literal, Union, cast, + final, ) from warnings import warn @@ -1211,12 +1212,15 @@ def __init__(self, obj, n: int, keep: str): def compute(self, method: str) -> DataFrame | Series: raise NotImplementedError + @final def nlargest(self): return self.compute("nlargest") + @final def nsmallest(self): return self.compute("nsmallest") + @final @staticmethod def is_valid_dtype_n_method(dtype: DtypeObj) -> bool: """ @@ -1255,6 +1259,18 @@ def compute(self, method: str) -> Series: dropped = self.obj.dropna() + if is_extension_array_dtype(dropped.dtype): + # GH#41816 bc we have dropped NAs above, MaskedArrays can use the + # numpy logic. + from pandas.core.arrays import BaseMaskedArray + + arr = dropped._values + if isinstance(arr, BaseMaskedArray): + ser = type(dropped)(arr._data, index=dropped.index, name=dropped.name) + + result = type(self)(ser, n=self.n, keep=self.keep).compute(method) + return result.astype(arr.dtype) + # slow method if n >= len(self.obj): ascending = method == "nsmallest" diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index 3af06145b9fcd..c20831e6832a4 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -211,3 +211,14 @@ def test_nlargest_boolean(self, data, expected): result = ser.nlargest(1) expected = Series(expected) tm.assert_series_equal(result, expected) + + def test_nlargest_nullable(self, any_nullable_numeric_dtype): + # GH#42816 NB so far we only handles cases without NAs + dtype = any_nullable_numeric_dtype + arr = np.random.randn(10).astype(dtype.lower(), copy=False) + + ser = Series(arr, dtype=dtype) + result = ser.nlargest(5) + + expected = Series(arr).nlargest(5).astype(dtype) + tm.assert_series_equal(result, expected) From 183dacc361a6321c4000b6a34445899676bb9018 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Aug 2021 06:54:12 -0700 Subject: [PATCH 2/3] Update doc/source/whatsnew/v1.3.2.rst Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.3.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index c2f9046c96108..4cc3e0bdeb0db 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -21,7 +21,7 @@ Fixed regressions - Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`) - Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) - Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`) -- Regression in :meth:`Series.nlargest` and :meth:`Series.nsmallest` with nullabe integer or float dtype (:issue:`41816`) +- Regression in :meth:`Series.nlargest` and :meth:`Series.nsmallest` with nullable integer or float dtype (:issue:`41816`) .. --------------------------------------------------------------------------- From 7340098906aac9840bcd9cc8537648f6b16aa32d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 3 Aug 2021 09:25:16 -0700 Subject: [PATCH 3/3] update test with case including NA --- pandas/tests/series/methods/test_nlargest.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index c20831e6832a4..0efb0663a0327 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -213,12 +213,17 @@ def test_nlargest_boolean(self, data, expected): tm.assert_series_equal(result, expected) def test_nlargest_nullable(self, any_nullable_numeric_dtype): - # GH#42816 NB so far we only handles cases without NAs + # GH#42816 dtype = any_nullable_numeric_dtype arr = np.random.randn(10).astype(dtype.lower(), copy=False) - ser = Series(arr, dtype=dtype) + ser = Series(arr.copy(), dtype=dtype) + ser[1] = pd.NA result = ser.nlargest(5) - expected = Series(arr).nlargest(5).astype(dtype) + expected = ( + Series(np.delete(arr, 1), index=ser.index.delete(1)) + .nlargest(5) + .astype(dtype) + ) tm.assert_series_equal(result, expected)