Skip to content

Commit 28b831f

Browse files
jschendeljorisvandenbossche
authored andcommitted
BUG: Fix Series.nlargest for integer boundary values (#21432)
(cherry picked from commit ec5956e)
1 parent 787ef30 commit 28b831f

File tree

5 files changed

+321
-43
lines changed

5 files changed

+321
-43
lines changed

doc/source/whatsnew/v0.23.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,5 @@ Bug Fixes
8080

8181
**Other**
8282

83+
- Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
8384
-

pandas/conftest.py

+71
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,14 @@ def join_type(request):
129129
return request.param
130130

131131

132+
@pytest.fixture(params=['nlargest', 'nsmallest'])
133+
def nselect_method(request):
134+
"""
135+
Fixture for trying all nselect methods
136+
"""
137+
return request.param
138+
139+
132140
@pytest.fixture(params=[None, np.nan, pd.NaT, float('nan'), np.float('NaN')])
133141
def nulls_fixture(request):
134142
"""
@@ -170,3 +178,66 @@ def string_dtype(request):
170178
* 'U'
171179
"""
172180
return request.param
181+
182+
183+
@pytest.fixture(params=["float32", "float64"])
184+
def float_dtype(request):
185+
"""
186+
Parameterized fixture for float dtypes.
187+
188+
* float32
189+
* float64
190+
"""
191+
192+
return request.param
193+
194+
195+
UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
196+
SIGNED_INT_DTYPES = ["int8", "int16", "int32", "int64"]
197+
ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
198+
199+
200+
@pytest.fixture(params=SIGNED_INT_DTYPES)
201+
def sint_dtype(request):
202+
"""
203+
Parameterized fixture for signed integer dtypes.
204+
205+
* int8
206+
* int16
207+
* int32
208+
* int64
209+
"""
210+
211+
return request.param
212+
213+
214+
@pytest.fixture(params=UNSIGNED_INT_DTYPES)
215+
def uint_dtype(request):
216+
"""
217+
Parameterized fixture for unsigned integer dtypes.
218+
219+
* uint8
220+
* uint16
221+
* uint32
222+
* uint64
223+
"""
224+
225+
return request.param
226+
227+
228+
@pytest.fixture(params=ALL_INT_DTYPES)
229+
def any_int_dtype(request):
230+
"""
231+
Parameterized fixture for any integer dtypes.
232+
233+
* int8
234+
* uint8
235+
* int16
236+
* uint16
237+
* int32
238+
* uint32
239+
* int64
240+
* uint64
241+
"""
242+
243+
return request.param

pandas/core/algorithms.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1131,9 +1131,12 @@ def compute(self, method):
11311131
return dropped[slc].sort_values(ascending=ascending).head(n)
11321132

11331133
# fast method
1134-
arr, _, _ = _ensure_data(dropped.values)
1134+
arr, pandas_dtype, _ = _ensure_data(dropped.values)
11351135
if method == 'nlargest':
11361136
arr = -arr
1137+
if is_integer_dtype(pandas_dtype):
1138+
# GH 21426: ensure reverse ordering at boundaries
1139+
arr -= 1
11371140

11381141
if self.keep == 'last':
11391142
arr = arr[::-1]

pandas/tests/frame/test_analytics.py

+36-42
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from numpy.random import randn
1313
import numpy as np
1414

15-
from pandas.compat import lrange, product, PY35
15+
from pandas.compat import lrange, PY35
1616
from pandas import (compat, isna, notna, DataFrame, Series,
1717
MultiIndex, date_range, Timestamp, Categorical,
1818
_np_version_under1p12, _np_version_under1p15)
@@ -2240,54 +2240,49 @@ class TestNLargestNSmallest(object):
22402240

22412241
# ----------------------------------------------------------------------
22422242
# Top / bottom
2243-
@pytest.mark.parametrize(
2244-
'method, n, order',
2245-
product(['nsmallest', 'nlargest'], range(1, 11),
2246-
[['a'],
2247-
['c'],
2248-
['a', 'b'],
2249-
['a', 'c'],
2250-
['b', 'a'],
2251-
['b', 'c'],
2252-
['a', 'b', 'c'],
2253-
['c', 'a', 'b'],
2254-
['c', 'b', 'a'],
2255-
['b', 'c', 'a'],
2256-
['b', 'a', 'c'],
2257-
2258-
# dups!
2259-
['b', 'c', 'c'],
2260-
2261-
]))
2262-
def test_n(self, df_strings, method, n, order):
2243+
@pytest.mark.parametrize('order', [
2244+
['a'],
2245+
['c'],
2246+
['a', 'b'],
2247+
['a', 'c'],
2248+
['b', 'a'],
2249+
['b', 'c'],
2250+
['a', 'b', 'c'],
2251+
['c', 'a', 'b'],
2252+
['c', 'b', 'a'],
2253+
['b', 'c', 'a'],
2254+
['b', 'a', 'c'],
2255+
2256+
# dups!
2257+
['b', 'c', 'c']])
2258+
@pytest.mark.parametrize('n', range(1, 11))
2259+
def test_n(self, df_strings, nselect_method, n, order):
22632260
# GH10393
22642261
df = df_strings
22652262
if 'b' in order:
22662263

22672264
error_msg = self.dtype_error_msg_template.format(
2268-
column='b', method=method, dtype='object')
2265+
column='b', method=nselect_method, dtype='object')
22692266
with tm.assert_raises_regex(TypeError, error_msg):
2270-
getattr(df, method)(n, order)
2267+
getattr(df, nselect_method)(n, order)
22712268
else:
2272-
ascending = method == 'nsmallest'
2273-
result = getattr(df, method)(n, order)
2269+
ascending = nselect_method == 'nsmallest'
2270+
result = getattr(df, nselect_method)(n, order)
22742271
expected = df.sort_values(order, ascending=ascending).head(n)
22752272
tm.assert_frame_equal(result, expected)
22762273

2277-
@pytest.mark.parametrize(
2278-
'method, columns',
2279-
product(['nsmallest', 'nlargest'],
2280-
product(['group'], ['category_string', 'string'])
2281-
))
2282-
def test_n_error(self, df_main_dtypes, method, columns):
2274+
@pytest.mark.parametrize('columns', [
2275+
('group', 'category_string'), ('group', 'string')])
2276+
def test_n_error(self, df_main_dtypes, nselect_method, columns):
22832277
df = df_main_dtypes
2278+
col = columns[1]
22842279
error_msg = self.dtype_error_msg_template.format(
2285-
column=columns[1], method=method, dtype=df[columns[1]].dtype)
2280+
column=col, method=nselect_method, dtype=df[col].dtype)
22862281
# escape some characters that may be in the repr
22872282
error_msg = (error_msg.replace('(', '\\(').replace(")", "\\)")
22882283
.replace("[", "\\[").replace("]", "\\]"))
22892284
with tm.assert_raises_regex(TypeError, error_msg):
2290-
getattr(df, method)(2, columns)
2285+
getattr(df, nselect_method)(2, columns)
22912286

22922287
def test_n_all_dtypes(self, df_main_dtypes):
22932288
df = df_main_dtypes
@@ -2308,15 +2303,14 @@ def test_n_identical_values(self):
23082303
expected = pd.DataFrame({'a': [1] * 3, 'b': [1, 2, 3]})
23092304
tm.assert_frame_equal(result, expected)
23102305

2311-
@pytest.mark.parametrize(
2312-
'n, order',
2313-
product([1, 2, 3, 4, 5],
2314-
[['a', 'b', 'c'],
2315-
['c', 'b', 'a'],
2316-
['a'],
2317-
['b'],
2318-
['a', 'b'],
2319-
['c', 'b']]))
2306+
@pytest.mark.parametrize('order', [
2307+
['a', 'b', 'c'],
2308+
['c', 'b', 'a'],
2309+
['a'],
2310+
['b'],
2311+
['a', 'b'],
2312+
['c', 'b']])
2313+
@pytest.mark.parametrize('n', range(1, 6))
23202314
def test_n_duplicate_index(self, df_duplicates, n, order):
23212315
# GH 13412
23222316

0 commit comments

Comments
 (0)