@@ -1140,6 +1140,20 @@ def test_nlargest_multiple_columns(self):
1140
1140
expected = df .sort_values (['a' , 'b' ], ascending = False ).head (5 )
1141
1141
tm .assert_frame_equal (result , expected )
1142
1142
1143
+ def test_nlargest_nsmallest_identical_values (self ):
1144
+ # GH15297
1145
+ df = pd .DataFrame ({'a' : [1 ] * 5 , 'b' : [1 , 2 , 3 , 4 , 5 ]})
1146
+
1147
+ result = df .nlargest (3 , 'a' )
1148
+ expected = pd .DataFrame (
1149
+ {'a' : [1 ] * 3 , 'b' : [5 , 4 , 3 ]}, index = [4 , 3 , 2 ]
1150
+ )
1151
+ tm .assert_frame_equal (result , expected )
1152
+
1153
+ result = df .nsmallest (3 , 'a' )
1154
+ expected = pd .DataFrame ({'a' : [1 ] * 3 , 'b' : [1 , 2 , 3 ]})
1155
+ tm .assert_frame_equal (result , expected )
1156
+
1143
1157
def test_nsmallest (self ):
1144
1158
from string import ascii_lowercase
1145
1159
df = pd .DataFrame ({'a' : np .random .permutation (10 ),
@@ -1159,33 +1173,32 @@ def test_nsmallest_multiple_columns(self):
1159
1173
1160
1174
def test_nsmallest_nlargest_duplicate_index (self ):
1161
1175
# GH 13412
1162
- df = pd .DataFrame ({'a' : [1 , 2 , 3 , 4 ],
1163
- 'b' : [4 , 3 , 2 , 1 ],
1164
- 'c' : [0 , 1 , 2 , 3 ]},
1165
- index = [0 , 0 , 1 , 1 ])
1166
- result = df .nsmallest (4 , 'a' )
1167
- expected = df .sort_values ('a' ).head (4 )
1168
- tm .assert_frame_equal (result , expected )
1176
+ df = pd .DataFrame ({'a' : [1 , 2 , 3 , 3 , 3 ],
1177
+ 'b' : [1 , 1 , 1 , 1 , 1 ],
1178
+ 'c' : [0 , 1 , 2 , 5 , 4 ]},
1179
+ index = [0 , 0 , 1 , 1 , 1 ])
1169
1180
1170
- result = df .nlargest (4 , 'a' )
1171
- expected = df .sort_values ('a' , ascending = False ).head (4 )
1181
+ result = df .nsmallest (4 , [ 'a' , 'b' , 'c' ] )
1182
+ expected = df .sort_values ([ 'a' , 'b' , 'c' ] ).head (4 )
1172
1183
tm .assert_frame_equal (result , expected )
1173
1184
1174
- result = df .nsmallest (4 , ['a' , 'c' ])
1175
- expected = df .sort_values (['a' , 'c' ]).head (4 )
1185
+ result = df .nlargest (4 , ['a' , 'b ' , 'c' ])
1186
+ expected = df .sort_values (['a' , 'b' , ' c' ], ascending = False ).head (4 )
1176
1187
tm .assert_frame_equal (result , expected )
1177
1188
1178
- result = df .nsmallest (4 , ['c' , 'a' ])
1179
- expected = df .sort_values (['c' , 'a' ]).head (4 )
1189
+ result = df .nlargest (4 , ['c' , 'b ' , 'a' ])
1190
+ expected = df .sort_values (['c' , 'b' , ' a' ], ascending = False ).head (4 )
1180
1191
tm .assert_frame_equal (result , expected )
1181
1192
1182
- result = df .nlargest (4 , ['a ' , 'c ' ])
1183
- expected = df .sort_values (['a ' , 'c' ], ascending = False ).head (4 )
1193
+ result = df .nsmallest (4 , ['c ' , 'b' , 'a ' ])
1194
+ expected = df .sort_values (['c ' , 'b' , 'a' ] ).head (4 )
1184
1195
tm .assert_frame_equal (result , expected )
1185
1196
1186
- result = df .nlargest (4 , ['c' , 'a' ])
1187
- expected = df .sort_values (['c' , 'a' ], ascending = False ).head (4 )
1197
+ # Test all duplicates still returns df of size n
1198
+ result = df .nsmallest (2 , 'b' )
1199
+ expected = df .sort_values ('b' ).head (2 )
1188
1200
tm .assert_frame_equal (result , expected )
1201
+
1189
1202
# ----------------------------------------------------------------------
1190
1203
# Isin
1191
1204
0 commit comments