From aa6d86776167ec4651caffc49f3b2820e1d6de70 Mon Sep 17 00:00:00 2001
From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com>
Date: Sun, 10 Apr 2022 11:59:37 -0500
Subject: [PATCH] Backport PR #46656: BUG: df.nsmallest get wrong results when
 NaN in the sorting column

---
 doc/source/whatsnew/v1.4.3.rst               |  1 +
 pandas/core/algorithms.py                    |  6 +++++-
 pandas/tests/frame/methods/test_nlargest.py  | 21 ++++++++++++++++++++
 pandas/tests/series/methods/test_nlargest.py | 12 +++++++++++
 4 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst
index 8572c136c28a9..0c326e15d90ed 100644
--- a/doc/source/whatsnew/v1.4.3.rst
+++ b/doc/source/whatsnew/v1.4.3.rst
@@ -14,6 +14,7 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
+- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`)
 - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
 -
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 36eabe93dbd7e..32e3e19688a63 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1216,7 +1216,6 @@ def compute(self, method: str) -> Series:
             arr = arr[::-1]
 
         nbase = n
-        findex = len(self.obj)
         narr = len(arr)
         n = min(n, narr)
 
@@ -1229,6 +1228,11 @@ def compute(self, method: str) -> Series:
         if self.keep != "all":
             inds = inds[:n]
             findex = nbase
+        else:
+            if len(inds) < nbase and len(nan_index) + len(inds) >= nbase:
+                findex = len(nan_index) + len(inds)
+            else:
+                findex = len(inds)
 
         if self.keep == "last":
             # reverse indices
diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index 1b2db80d782ce..a317dae562ae0 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -216,3 +216,24 @@ def test_nlargest_nan(self):
         result = df.nlargest(5, 0)
         expected = df.sort_values(0, ascending=False).head(5)
         tm.assert_frame_equal(result, expected)
+
+    def test_nsmallest_nan_after_n_element(self):
+        # GH#46589
+        df = pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, None, 7],
+                "b": [7, 6, 5, 4, 3, 2, 1],
+                "c": [1, 1, 2, 2, 3, 3, 3],
+            },
+            index=range(7),
+        )
+        result = df.nsmallest(5, columns=["a", "b"])
+        expected = pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5],
+                "b": [7, 6, 5, 4, 3],
+                "c": [1, 1, 2, 2, 3],
+            },
+            index=range(5),
+        ).astype({"a": "float"})
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py
index ee96ab08ad66c..4f07257038bc9 100644
--- a/pandas/tests/series/methods/test_nlargest.py
+++ b/pandas/tests/series/methods/test_nlargest.py
@@ -231,3 +231,15 @@ def test_nlargest_nullable(self, any_numeric_ea_dtype):
             .astype(dtype)
         )
         tm.assert_series_equal(result, expected)
+
+    def test_nsmallest_nan_when_keep_is_all(self):
+        # GH#46589
+        s = Series([1, 2, 3, 3, 3, None])
+        result = s.nsmallest(3, keep="all")
+        expected = Series([1.0, 2.0, 3.0, 3.0, 3.0])
+        tm.assert_series_equal(result, expected)
+
+        s = Series([1, 2, None, None, None])
+        result = s.nsmallest(3, keep="all")
+        expected = Series([1, 2, None, None, None])
+        tm.assert_series_equal(result, expected)