BUG: Fix Series.nlargest for integer boundary values

jschendel · jschendel · commit c52bcf7ef460 · 2018-06-11T17:33:03.000-06:00
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1133,9 +1133,12 @@ def compute(self, method):
             return dropped[slc].sort_values(ascending=ascending).head(n)
 
         # fast method
-        arr, _, _ = _ensure_data(dropped.values)
+        arr, pandas_dtype, _ = _ensure_data(dropped.values)
         if method == 'nlargest':
             arr = -arr
+            if is_integer_dtype(pandas_dtype):
+                # GH 21426: ensure reverse ordering at boundaries
+                arr -= 1
 
         if self.keep == 'last':
             arr = arr[::-1]
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
@@ -13,6 +13,8 @@
 from pandas import (Series, Categorical, DataFrame, isna, notna,
                     bdate_range, date_range, _np_version_under1p10,
                     CategoricalIndex)
+from pandas.core.dtypes.common import (
+    is_float_dtype, is_integer_dtype, is_datetimelike)
 from pandas.core.index import MultiIndex
 from pandas.core.indexes.datetimes import Timestamp
 from pandas.core.indexes.timedeltas import Timedelta
@@ -2028,6 +2030,33 @@ def test_n(self, n):
         expected = s.sort_values().head(n)
         assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('dtype', [
+        'int8', 'int16', 'int32', 'int64',
+        'uint8', 'uint16', 'uint32', 'uint64',
+        'float16', 'float32', 'float64',
+        'datetime64[ns]', 'timedelta64[ns]'])
+    @pytest.mark.parametrize('method', ['nsmallest', 'nlargest'])
+    def test_boundary(self, method, dtype):
+        # GH 21426
+        if is_float_dtype(dtype):
+            min_val, max_val = np.finfo(dtype).min, np.finfo(dtype).max
+            min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=dtype)
+            vals = [min_val, min_2nd, max_2nd, max_val]
+        elif is_integer_dtype(dtype):
+            min_val, max_val = np.iinfo(dtype).min, np.iinfo(dtype).max
+            vals = [min_val, min_val + 1, max_val - 1, max_val]
+        elif is_datetimelike(dtype):
+            # use int64 bounds and +1 to min_val since true minimum is NaT
+            # (include min_val/NaT at end to maintain same expected_idxr)
+            min_val, max_val = np.iinfo('int64').min, np.iinfo('int64').max
+            vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
+
+        s = Series(vals, dtype=dtype)
+        result = getattr(s, method)(3)
+        expected_idxr = [0, 1, 2] if method == 'nsmallest' else [3, 2, 1]
+        expected = s.loc[expected_idxr]
+        tm.assert_series_equal(result, expected)
+
 
 class TestCategoricalSeriesAnalytics(object):