Skip to content

Commit fdfaa97

Browse files
committed
BUG: _nsorted incorrect with duplicated values in index (#13412)
Add note to whatsnew Add nlargest benchmark
1 parent f26b049 commit fdfaa97

File tree

4 files changed

+42
-4
lines changed

4 files changed

+42
-4
lines changed

asv_bench/benchmarks/frame_methods.py

+10
Original file line numberDiff line numberDiff line change
@@ -1012,3 +1012,13 @@ def setup(self):
10121012

10131013
def time_frame_quantile_axis1(self):
10141014
self.df.quantile([0.1, 0.5], axis=1)
1015+
1016+
class frame_nlargest(object):
1017+
goal_time = 0.2
1018+
1019+
def setup(self):
1020+
self.df = DataFrame(np.random.randn(1000, 3),
1021+
columns=list('ABC'))
1022+
1023+
def time_frame_nlargest(self):
1024+
self.df.nlargest(100, 'A')

doc/source/whatsnew/v0.19.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ Bug Fixes
5151
- Compat with python 3.6 for Timestamp pickles (:issue:`14689`)
5252

5353

54-
54+
- Bug in ``DataFrame.nlargest`` and ``DataFrame.nsmallest`` when the index had duplicate values (:issue:`13412`)
5555

5656

5757

pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3395,9 +3395,9 @@ def _nsorted(self, columns, n, method, keep):
33953395
columns = [columns]
33963396
columns = list(columns)
33973397
ser = getattr(self[columns[0]], method)(n, keep=keep)
3398-
ascending = dict(nlargest=False, nsmallest=True)[method]
3399-
return self.loc[ser.index].sort_values(columns, ascending=ascending,
3400-
kind='mergesort')
3398+
if isinstance(ser, Series):
3399+
ser = ser.to_frame()
3400+
return ser.merge(self, on=columns[0], left_index=True)[self.columns]
34013401

34023402
def nlargest(self, n, columns, keep='first'):
34033403
"""Get the rows of a DataFrame sorted by the `n` largest

pandas/tests/frame/test_analytics.py

+28
Original file line numberDiff line numberDiff line change
@@ -1323,6 +1323,34 @@ def test_nsmallest_multiple_columns(self):
13231323
expected = df.sort_values(['a', 'c']).head(5)
13241324
tm.assert_frame_equal(result, expected)
13251325

1326+
def test_nsmallest_nlargest_duplicate_index(self):
1327+
df = pd.DataFrame({'a': [1, 2, 3, 4],
1328+
'b': [4, 3, 2, 1],
1329+
'c': [0, 1, 2, 3]},
1330+
index=[0, 0, 1, 1])
1331+
result = df.nsmallest(4, 'a')
1332+
expected = df.sort_values('a').head(4)
1333+
tm.assert_frame_equal(result, expected)
1334+
1335+
result = df.nlargest(4, 'a')
1336+
expected = df.sort_values('a', ascending=False).head(4)
1337+
tm.assert_frame_equal(result, expected)
1338+
1339+
result = df.nsmallest(4, ['a', 'c'])
1340+
expected = df.sort_values(['a', 'c']).head(4)
1341+
tm.assert_frame_equal(result, expected)
1342+
1343+
result = df.nsmallest(4, ['c', 'a'])
1344+
expected = df.sort_values(['c', 'a']).head(4)
1345+
tm.assert_frame_equal(result, expected)
1346+
1347+
result = df.nlargest(4, ['a', 'c'])
1348+
expected = df.sort_values(['a', 'c'], ascending=False).head(4)
1349+
tm.assert_frame_equal(result, expected)
1350+
1351+
result = df.nlargest(4, ['c', 'a'])
1352+
expected = df.sort_values(['c', 'a'], ascending=False).head(4)
1353+
tm.assert_frame_equal(result, expected)
13261354
# ----------------------------------------------------------------------
13271355
# Isin
13281356

0 commit comments

Comments
 (0)