Skip to content

Commit eab2cfa

Browse files
authored
BUG: df.nsmallest get wrong results when NaN in the sorting column (#46656)
1 parent 96ecaf9 commit eab2cfa

File tree

4 files changed

+39
-1
lines changed

4 files changed

+39
-1
lines changed

doc/source/whatsnew/v1.4.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ including other versions of pandas.
1414

1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
17+
- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`)
1718
- Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
1819
-
1920

pandas/core/algorithms.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1181,7 +1181,6 @@ def compute(self, method: str) -> Series:
11811181
arr = arr[::-1]
11821182

11831183
nbase = n
1184-
findex = len(self.obj)
11851184
narr = len(arr)
11861185
n = min(n, narr)
11871186

@@ -1194,6 +1193,11 @@ def compute(self, method: str) -> Series:
11941193
if self.keep != "all":
11951194
inds = inds[:n]
11961195
findex = nbase
1196+
else:
1197+
if len(inds) < nbase and len(nan_index) + len(inds) >= nbase:
1198+
findex = len(nan_index) + len(inds)
1199+
else:
1200+
findex = len(inds)
11971201

11981202
if self.keep == "last":
11991203
# reverse indices

pandas/tests/frame/methods/test_nlargest.py

+21
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,24 @@ def test_nlargest_nan(self):
216216
result = df.nlargest(5, 0)
217217
expected = df.sort_values(0, ascending=False).head(5)
218218
tm.assert_frame_equal(result, expected)
219+
220+
def test_nsmallest_nan_after_n_element(self):
221+
# GH#46589
222+
df = pd.DataFrame(
223+
{
224+
"a": [1, 2, 3, 4, 5, None, 7],
225+
"b": [7, 6, 5, 4, 3, 2, 1],
226+
"c": [1, 1, 2, 2, 3, 3, 3],
227+
},
228+
index=range(7),
229+
)
230+
result = df.nsmallest(5, columns=["a", "b"])
231+
expected = pd.DataFrame(
232+
{
233+
"a": [1, 2, 3, 4, 5],
234+
"b": [7, 6, 5, 4, 3],
235+
"c": [1, 1, 2, 2, 3],
236+
},
237+
index=range(5),
238+
).astype({"a": "float"})
239+
tm.assert_frame_equal(result, expected)

pandas/tests/series/methods/test_nlargest.py

+12
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,15 @@ def test_nlargest_nullable(self, any_numeric_ea_dtype):
231231
.astype(dtype)
232232
)
233233
tm.assert_series_equal(result, expected)
234+
235+
def test_nsmallest_nan_when_keep_is_all(self):
236+
# GH#46589
237+
s = Series([1, 2, 3, 3, 3, None])
238+
result = s.nsmallest(3, keep="all")
239+
expected = Series([1.0, 2.0, 3.0, 3.0, 3.0])
240+
tm.assert_series_equal(result, expected)
241+
242+
s = Series([1, 2, None, None, None])
243+
result = s.nsmallest(3, keep="all")
244+
expected = Series([1, 2, None, None, None])
245+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)