Skip to content

Commit f1b60ad

Browse files
jbrockmendelmeeseeksmachine
authored andcommitted
Backport PR pandas-dev#42838: REGR: Series.nlargest with masked arrays
1 parent 8a19457 commit f1b60ad

File tree

3 files changed

+33
-0
lines changed

3 files changed

+33
-0
lines changed

doc/source/whatsnew/v1.3.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Fixed regressions
2323
- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`)
2424
- Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`)
2525
- Fixed regression in :meth:`.Styler.highlight_min` and :meth:`.Styler.highlight_max` where ``pandas.NA`` was not successfully ignored (:issue:`42650`)
26+
- Regression in :meth:`Series.nlargest` and :meth:`Series.nsmallest` with nullable integer or float dtype (:issue:`41816`)
2627
- Fixed regression in :meth:`pandas.Series.quantile` with :class:`pandas.Int64Dtype` (:issue:`42626`)
2728

2829
.. ---------------------------------------------------------------------------

pandas/core/algorithms.py

+16
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
TYPE_CHECKING,
1111
Union,
1212
cast,
13+
final,
1314
)
1415
from warnings import warn
1516

@@ -1215,12 +1216,15 @@ def __init__(self, obj, n: int, keep: str):
12151216
def compute(self, method: str) -> FrameOrSeriesUnion:
12161217
raise NotImplementedError
12171218

1219+
@final
12181220
def nlargest(self):
12191221
return self.compute("nlargest")
12201222

1223+
@final
12211224
def nsmallest(self):
12221225
return self.compute("nsmallest")
12231226

1227+
@final
12241228
@staticmethod
12251229
def is_valid_dtype_n_method(dtype: DtypeObj) -> bool:
12261230
"""
@@ -1259,6 +1263,18 @@ def compute(self, method: str) -> Series:
12591263

12601264
dropped = self.obj.dropna()
12611265

1266+
if is_extension_array_dtype(dropped.dtype):
1267+
# GH#41816 bc we have dropped NAs above, MaskedArrays can use the
1268+
# numpy logic.
1269+
from pandas.core.arrays import BaseMaskedArray
1270+
1271+
arr = dropped._values
1272+
if isinstance(arr, BaseMaskedArray):
1273+
ser = type(dropped)(arr._data, index=dropped.index, name=dropped.name)
1274+
1275+
result = type(self)(ser, n=self.n, keep=self.keep).compute(method)
1276+
return result.astype(arr.dtype)
1277+
12621278
# slow method
12631279
if n >= len(self.obj):
12641280
ascending = method == "nsmallest"

pandas/tests/series/methods/test_nlargest.py

+16
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,19 @@ def test_nlargest_boolean(self, data, expected):
211211
result = ser.nlargest(1)
212212
expected = Series(expected)
213213
tm.assert_series_equal(result, expected)
214+
215+
def test_nlargest_nullable(self, any_nullable_numeric_dtype):
216+
# GH#42816
217+
dtype = any_nullable_numeric_dtype
218+
arr = np.random.randn(10).astype(dtype.lower(), copy=False)
219+
220+
ser = Series(arr.copy(), dtype=dtype)
221+
ser[1] = pd.NA
222+
result = ser.nlargest(5)
223+
224+
expected = (
225+
Series(np.delete(arr, 1), index=ser.index.delete(1))
226+
.nlargest(5)
227+
.astype(dtype)
228+
)
229+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)