Skip to content

Commit 2886388

Browse files
Backport PR #46656: BUG: df.nsmallest get wrong results when NaN in the sorting column (#46748)
Co-authored-by: Yuanhao Geng <[email protected]>
1 parent dfbc1dc commit 2886388

File tree

4 files changed

+39
-1
lines changed

4 files changed

+39
-1
lines changed

doc/source/whatsnew/v1.4.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ including other versions of pandas.
1414

1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
17+
- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`)
1718
- Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
1819
-
1920

pandas/core/algorithms.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1216,7 +1216,6 @@ def compute(self, method: str) -> Series:
12161216
arr = arr[::-1]
12171217

12181218
nbase = n
1219-
findex = len(self.obj)
12201219
narr = len(arr)
12211220
n = min(n, narr)
12221221

@@ -1229,6 +1228,11 @@ def compute(self, method: str) -> Series:
12291228
if self.keep != "all":
12301229
inds = inds[:n]
12311230
findex = nbase
1231+
else:
1232+
if len(inds) < nbase and len(nan_index) + len(inds) >= nbase:
1233+
findex = len(nan_index) + len(inds)
1234+
else:
1235+
findex = len(inds)
12321236

12331237
if self.keep == "last":
12341238
# reverse indices

pandas/tests/frame/methods/test_nlargest.py

+21
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,24 @@ def test_nlargest_nan(self):
216216
result = df.nlargest(5, 0)
217217
expected = df.sort_values(0, ascending=False).head(5)
218218
tm.assert_frame_equal(result, expected)
219+
220+
def test_nsmallest_nan_after_n_element(self):
221+
# GH#46589
222+
df = pd.DataFrame(
223+
{
224+
"a": [1, 2, 3, 4, 5, None, 7],
225+
"b": [7, 6, 5, 4, 3, 2, 1],
226+
"c": [1, 1, 2, 2, 3, 3, 3],
227+
},
228+
index=range(7),
229+
)
230+
result = df.nsmallest(5, columns=["a", "b"])
231+
expected = pd.DataFrame(
232+
{
233+
"a": [1, 2, 3, 4, 5],
234+
"b": [7, 6, 5, 4, 3],
235+
"c": [1, 1, 2, 2, 3],
236+
},
237+
index=range(5),
238+
).astype({"a": "float"})
239+
tm.assert_frame_equal(result, expected)

pandas/tests/series/methods/test_nlargest.py

+12
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,15 @@ def test_nlargest_nullable(self, any_numeric_ea_dtype):
231231
.astype(dtype)
232232
)
233233
tm.assert_series_equal(result, expected)
234+
235+
def test_nsmallest_nan_when_keep_is_all(self):
236+
# GH#46589
237+
s = Series([1, 2, 3, 3, 3, None])
238+
result = s.nsmallest(3, keep="all")
239+
expected = Series([1.0, 2.0, 3.0, 3.0, 3.0])
240+
tm.assert_series_equal(result, expected)
241+
242+
s = Series([1, 2, None, None, None])
243+
result = s.nsmallest(3, keep="all")
244+
expected = Series([1, 2, None, None, None])
245+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)