Skip to content

Commit 61cbb73

Browse files
authored
REGR: Series.nlargest with masked arrays (#42838)
1 parent 83fabfb commit 61cbb73

File tree

3 files changed

+33
-0
lines changed

3 files changed

+33
-0
lines changed

doc/source/whatsnew/v1.3.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Fixed regressions
2323
- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`)
2424
- Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`)
2525
- Fixed regression in :meth:`.Styler.highlight_min` and :meth:`.Styler.highlight_max` where ``pandas.NA`` was not successfully ignored (:issue:`42650`)
26+
- Regression in :meth:`Series.nlargest` and :meth:`Series.nsmallest` with nullable integer or float dtype (:issue:`41816`)
2627
- Fixed regression in :meth:`pandas.Series.quantile` with :class:`pandas.Int64Dtype` (:issue:`42626`)
2728

2829
.. ---------------------------------------------------------------------------

pandas/core/algorithms.py

+16
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
Literal,
1212
Union,
1313
cast,
14+
final,
1415
)
1516
from warnings import warn
1617

@@ -1209,12 +1210,15 @@ def __init__(self, obj, n: int, keep: str):
12091210
def compute(self, method: str) -> DataFrame | Series:
12101211
raise NotImplementedError
12111212

1213+
@final
12121214
def nlargest(self):
12131215
return self.compute("nlargest")
12141216

1217+
@final
12151218
def nsmallest(self):
12161219
return self.compute("nsmallest")
12171220

1221+
@final
12181222
@staticmethod
12191223
def is_valid_dtype_n_method(dtype: DtypeObj) -> bool:
12201224
"""
@@ -1253,6 +1257,18 @@ def compute(self, method: str) -> Series:
12531257

12541258
dropped = self.obj.dropna()
12551259

1260+
if is_extension_array_dtype(dropped.dtype):
1261+
# GH#41816 bc we have dropped NAs above, MaskedArrays can use the
1262+
# numpy logic.
1263+
from pandas.core.arrays import BaseMaskedArray
1264+
1265+
arr = dropped._values
1266+
if isinstance(arr, BaseMaskedArray):
1267+
ser = type(dropped)(arr._data, index=dropped.index, name=dropped.name)
1268+
1269+
result = type(self)(ser, n=self.n, keep=self.keep).compute(method)
1270+
return result.astype(arr.dtype)
1271+
12561272
# slow method
12571273
if n >= len(self.obj):
12581274
ascending = method == "nsmallest"

pandas/tests/series/methods/test_nlargest.py

+16
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,19 @@ def test_nlargest_boolean(self, data, expected):
211211
result = ser.nlargest(1)
212212
expected = Series(expected)
213213
tm.assert_series_equal(result, expected)
214+
215+
def test_nlargest_nullable(self, any_nullable_numeric_dtype):
216+
# GH#42816
217+
dtype = any_nullable_numeric_dtype
218+
arr = np.random.randn(10).astype(dtype.lower(), copy=False)
219+
220+
ser = Series(arr.copy(), dtype=dtype)
221+
ser[1] = pd.NA
222+
result = ser.nlargest(5)
223+
224+
expected = (
225+
Series(np.delete(arr, 1), index=ser.index.delete(1))
226+
.nlargest(5)
227+
.astype(dtype)
228+
)
229+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)