@@ -1140,6 +1140,20 @@ def test_nlargest_multiple_columns(self):
1140
1140
expected = df .sort_values (['a' , 'b' ], ascending = False ).head (5 )
1141
1141
tm .assert_frame_equal (result , expected )
1142
1142
1143
+ def test_nlargest_nsmallest_identical_values (self ):
1144
+ # GH15297
1145
+ df = pd .DataFrame ({'a' : [1 ] * 5 , 'b' : [1 , 2 , 3 , 4 , 5 ]})
1146
+
1147
+ result = df .nlargest (3 , 'a' )
1148
+ expected = pd .DataFrame (
1149
+ {'a' : [1 ] * 3 , 'b' : [1 , 2 , 3 ]}, index = [0 , 1 , 2 ]
1150
+ )
1151
+ tm .assert_frame_equal (result , expected )
1152
+
1153
+ result = df .nsmallest (3 , 'a' )
1154
+ expected = pd .DataFrame ({'a' : [1 ] * 3 , 'b' : [1 , 2 , 3 ]})
1155
+ tm .assert_frame_equal (result , expected )
1156
+
1143
1157
def test_nsmallest (self ):
1144
1158
from string import ascii_lowercase
1145
1159
df = pd .DataFrame ({'a' : np .random .permutation (10 ),
@@ -1159,33 +1173,41 @@ def test_nsmallest_multiple_columns(self):
1159
1173
1160
1174
def test_nsmallest_nlargest_duplicate_index (self ):
1161
1175
# GH 13412
1162
- df = pd .DataFrame ({'a' : [1 , 2 , 3 , 4 ],
1163
- 'b' : [4 , 3 , 2 , 1 ],
1164
- 'c' : [0 , 1 , 2 , 3 ]},
1165
- index = [0 , 0 , 1 , 1 ])
1166
- result = df .nsmallest (4 , 'a' )
1167
- expected = df .sort_values ('a' ).head (4 )
1176
+ df = pd .DataFrame ({'a' : [1 , 2 , 3 , 4 , 4 ],
1177
+ 'b' : [1 , 1 , 1 , 1 , 1 ],
1178
+ 'c' : [0 , 1 , 2 , 5 , 4 ]},
1179
+ index = [0 , 0 , 1 , 1 , 1 ])
1180
+
1181
+ result = df .nsmallest (4 , ['a' , 'b' , 'c' ])
1182
+ expected = df .sort_values (['a' , 'b' , 'c' ]).head (4 )
1168
1183
tm .assert_frame_equal (result , expected )
1169
1184
1170
- result = df .nlargest (4 , 'a' )
1171
- expected = df .sort_values ('a' , ascending = False ).head (4 )
1185
+ result = df .nlargest (4 , [ 'a' , 'b' , 'c' ] )
1186
+ expected = df .sort_values ([ 'a' , 'b' , 'c' ] , ascending = False ).head (4 )
1172
1187
tm .assert_frame_equal (result , expected )
1173
1188
1174
- result = df .nsmallest (4 , ['a ' , 'c ' ])
1175
- expected = df .sort_values (['a ' , 'c' ] ).head (4 )
1189
+ result = df .nlargest (4 , ['c ' , 'b' , 'a ' ])
1190
+ expected = df .sort_values (['c ' , 'b' , 'a' ], ascending = False ).head (4 )
1176
1191
tm .assert_frame_equal (result , expected )
1177
1192
1178
- result = df .nsmallest (4 , ['c' , 'a' ])
1179
- expected = df .sort_values (['c' , 'a' ]).head (4 )
1193
+ result = df .nsmallest (4 , ['c' , 'b' , ' a' ])
1194
+ expected = df .sort_values (['c' , 'b' , ' a' ]).head (4 )
1180
1195
tm .assert_frame_equal (result , expected )
1181
1196
1182
- result = df .nlargest (4 , ['a' , 'c' ])
1183
- expected = df .sort_values (['a' , 'c' ], ascending = False ).head (4 )
1197
+ # Test all duplicates still returns df of size n
1198
+ result = df .nsmallest (2 , 'b' )
1199
+ expected = df .sort_values ('b' ).head (2 )
1184
1200
tm .assert_frame_equal (result , expected )
1185
1201
1186
- result = df .nlargest (4 , ['c' , 'a' ])
1187
- expected = df .sort_values (['c' , 'a' ], ascending = False ).head (4 )
1202
+ def test_nsmallest_nlargest_duplicate_multi_index (self ):
1203
+ df = pd .DataFrame ({'a' : [1 , 2 , 3 , 3 , 3 ],
1204
+ 'b' : [1 , 1 , 1 , 1 , 1 ],
1205
+ 'c' : [0 , 1 , 2 , 5 , 4 ]},
1206
+ index = [[0 , 0 , 0 , 0 , 0 ], [1 , 1 , 1 , 1 , 1 ]])
1207
+ result = df .nsmallest (4 , ['a' , 'b' , 'c' ])
1208
+ expected = df .sort_values (['a' , 'b' , 'c' ]).head (4 )
1188
1209
tm .assert_frame_equal (result , expected )
1210
+
1189
1211
# ----------------------------------------------------------------------
1190
1212
# Isin
1191
1213
0 commit comments