@@ -1140,6 +1140,18 @@ def test_nlargest_multiple_columns(self):
1140
1140
expected = df .sort_values (['a' , 'b' ], ascending = False ).head (5 )
1141
1141
tm .assert_frame_equal (result , expected )
1142
1142
1143
+ def test_nlargest_nsmallest_identical_values (self ):
1144
+ # GH15297
1145
+ df = pd .DataFrame ({'a' : [1 ] * 5 , 'b' : [1 , 2 , 3 , 4 , 5 ]})
1146
+
1147
+ result = df .nlargest (3 , 'a' )
1148
+ expected = pd .DataFrame ({'a' : [1 ] * 3 , 'b' : [5 , 4 , 3 ]}, index = [4 , 3 , 2 ])
1149
+ tm .assert_frame_equal (result , expected )
1150
+
1151
+ result = df .nsmallest (3 , 'a' )
1152
+ expected = pd .DataFrame ({'a' : [1 ] * 3 , 'b' : [1 , 2 , 3 ]})
1153
+ tm .assert_frame_equal (result , expected )
1154
+
1143
1155
def test_nsmallest (self ):
1144
1156
from string import ascii_lowercase
1145
1157
df = pd .DataFrame ({'a' : np .random .permutation (10 ),
@@ -1159,33 +1171,32 @@ def test_nsmallest_multiple_columns(self):
1159
1171
1160
1172
def test_nsmallest_nlargest_duplicate_index (self ):
1161
1173
# GH 13412
1162
- df = pd .DataFrame ({'a' : [1 , 2 , 3 , 4 ],
1163
- 'b' : [4 , 3 , 2 , 1 ],
1164
- 'c' : [0 , 1 , 2 , 3 ]},
1165
- index = [0 , 0 , 1 , 1 ])
1166
- result = df .nsmallest (4 , 'a' )
1167
- expected = df .sort_values ('a' ).head (4 )
1168
- tm .assert_frame_equal (result , expected )
1174
+ df = pd .DataFrame ({'a' : [1 , 2 , 3 , 3 , 3 ],
1175
+ 'b' : [1 , 1 , 1 , 1 , 1 ],
1176
+ 'c' : [0 , 1 , 2 , 5 , 4 ]},
1177
+ index = [0 , 0 , 1 , 1 , 1 ])
1169
1178
1170
- result = df .nlargest (4 , 'a' )
1171
- expected = df .sort_values ('a' , ascending = False ).head (4 )
1179
+ result = df .nsmallest (4 , [ 'a' , 'b' , 'c' ] )
1180
+ expected = df .sort_values ([ 'a' , 'b' , 'c' ] ).head (4 )
1172
1181
tm .assert_frame_equal (result , expected )
1173
1182
1174
- result = df .nsmallest (4 , ['a' , 'c' ])
1175
- expected = df .sort_values (['a' , 'c' ]).head (4 )
1183
+ result = df .nlargest (4 , ['a' , 'b ' , 'c' ])
1184
+ expected = df .sort_values (['a' , 'b' , ' c' ], ascending = False ).head (4 )
1176
1185
tm .assert_frame_equal (result , expected )
1177
1186
1178
- result = df .nsmallest (4 , ['c' , 'a' ])
1179
- expected = df .sort_values (['c' , 'a' ]).head (4 )
1187
+ result = df .nlargest (4 , ['c' , 'b ' , 'a' ])
1188
+ expected = df .sort_values (['c' , 'b' , ' a' ], ascending = False ).head (4 )
1180
1189
tm .assert_frame_equal (result , expected )
1181
1190
1182
- result = df .nlargest (4 , ['a ' , 'c ' ])
1183
- expected = df .sort_values (['a ' , 'c' ], ascending = False ).head (4 )
1191
+ result = df .nsmallest (4 , ['c ' , 'b' , 'a ' ])
1192
+ expected = df .sort_values (['c ' , 'b' , 'a' ] ).head (4 )
1184
1193
tm .assert_frame_equal (result , expected )
1185
1194
1186
- result = df .nlargest (4 , ['c' , 'a' ])
1187
- expected = df .sort_values (['c' , 'a' ], ascending = False ).head (4 )
1195
+ # Test all duplicates still returns df of size n
1196
+ result = df .nsmallest (2 , 'b' )
1197
+ expected = df .sort_values ('b' ).head (2 )
1188
1198
tm .assert_frame_equal (result , expected )
1199
+
1189
1200
# ----------------------------------------------------------------------
1190
1201
# Isin
1191
1202
0 commit comments