Skip to content

Commit c52bcf7

Browse files
committed
BUG: Fix Series.nlargest for integer boundary values
1 parent 4807905 commit c52bcf7

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed

pandas/core/algorithms.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1133,9 +1133,12 @@ def compute(self, method):
11331133
return dropped[slc].sort_values(ascending=ascending).head(n)
11341134

11351135
# fast method
1136-
arr, _, _ = _ensure_data(dropped.values)
1136+
arr, pandas_dtype, _ = _ensure_data(dropped.values)
11371137
if method == 'nlargest':
11381138
arr = -arr
1139+
if is_integer_dtype(pandas_dtype):
1140+
# GH 21426: ensure reverse ordering at boundaries
1141+
arr -= 1
11391142

11401143
if self.keep == 'last':
11411144
arr = arr[::-1]

pandas/tests/series/test_analytics.py

+29
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from pandas import (Series, Categorical, DataFrame, isna, notna,
1414
bdate_range, date_range, _np_version_under1p10,
1515
CategoricalIndex)
16+
from pandas.core.dtypes.common import (
17+
is_float_dtype, is_integer_dtype, is_datetimelike)
1618
from pandas.core.index import MultiIndex
1719
from pandas.core.indexes.datetimes import Timestamp
1820
from pandas.core.indexes.timedeltas import Timedelta
@@ -2028,6 +2030,33 @@ def test_n(self, n):
20282030
expected = s.sort_values().head(n)
20292031
assert_series_equal(result, expected)
20302032

2033+
@pytest.mark.parametrize('dtype', [
2034+
'int8', 'int16', 'int32', 'int64',
2035+
'uint8', 'uint16', 'uint32', 'uint64',
2036+
'float16', 'float32', 'float64',
2037+
'datetime64[ns]', 'timedelta64[ns]'])
2038+
@pytest.mark.parametrize('method', ['nsmallest', 'nlargest'])
2039+
def test_boundary(self, method, dtype):
2040+
# GH 21426
2041+
if is_float_dtype(dtype):
2042+
min_val, max_val = np.finfo(dtype).min, np.finfo(dtype).max
2043+
min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=dtype)
2044+
vals = [min_val, min_2nd, max_2nd, max_val]
2045+
elif is_integer_dtype(dtype):
2046+
min_val, max_val = np.iinfo(dtype).min, np.iinfo(dtype).max
2047+
vals = [min_val, min_val + 1, max_val - 1, max_val]
2048+
elif is_datetimelike(dtype):
2049+
# use int64 bounds and +1 to min_val since true minimum is NaT
2050+
# (include min_val/NaT at end to maintain same expected_idxr)
2051+
min_val, max_val = np.iinfo('int64').min, np.iinfo('int64').max
2052+
vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
2053+
2054+
s = Series(vals, dtype=dtype)
2055+
result = getattr(s, method)(3)
2056+
expected_idxr = [0, 1, 2] if method == 'nsmallest' else [3, 2, 1]
2057+
expected = s.loc[expected_idxr]
2058+
tm.assert_series_equal(result, expected)
2059+
20312060

20322061
class TestCategoricalSeriesAnalytics(object):
20332062

0 commit comments

Comments
 (0)