@@ -217,26 +217,48 @@ def test_sort_values_stable_descending_sort(self):
217
217
sorted_df = df .sort_values (by = "sort_col" , kind = "mergesort" , ascending = False )
218
218
tm .assert_frame_equal (df , sorted_df )
219
219
220
- def test_sort_values_stable_descending_multicolumn_sort (self ):
220
+ @pytest .mark .parametrize (
221
+ "expected_idx_non_na, ascending" ,
222
+ [
223
+ [
224
+ [3 , 4 , 5 , 0 , 1 , 8 , 6 , 9 , 7 , 10 , 13 , 14 ],
225
+ [True , True ],
226
+ ],
227
+ [
228
+ [0 , 3 , 4 , 5 , 1 , 8 , 6 , 7 , 10 , 13 , 14 , 9 ],
229
+ [True , False ],
230
+ ],
231
+ [
232
+ [9 , 7 , 10 , 13 , 14 , 6 , 8 , 1 , 3 , 4 , 5 , 0 ],
233
+ [False , True ],
234
+ ],
235
+ [
236
+ [7 , 10 , 13 , 14 , 9 , 6 , 8 , 1 , 0 , 3 , 4 , 5 ],
237
+ [False , False ],
238
+ ],
239
+ ],
240
+ )
241
+ @pytest .mark .parametrize ("na_position" , ["first" , "last" ])
242
+ def test_sort_values_stable_multicolumn_sort (
243
+ self , expected_idx_non_na , ascending , na_position
244
+ ):
245
+ # GH#38426 Clarify sort_values with mult. columns / labels is stable
221
246
df = DataFrame (
222
- {"A" : [1 , 2 , np .nan , 1 , 6 , 8 , 4 ], "B" : [9 , np .nan , 5 , 2 , 5 , 4 , 5 ]}
223
- )
224
- # test stable mergesort
225
- expected = DataFrame (
226
- {"A" : [np .nan , 8 , 6 , 4 , 2 , 1 , 1 ], "B" : [5 , 4 , 5 , 5 , np .nan , 2 , 9 ]},
227
- index = [2 , 5 , 4 , 6 , 1 , 3 , 0 ],
228
- )
229
- sorted_df = df .sort_values (
230
- ["A" , "B" ], ascending = [0 , 1 ], na_position = "first" , kind = "mergesort"
247
+ {
248
+ "A" : [1 , 2 , np .nan , 1 , 1 , 1 , 6 , 8 , 4 , 8 , 8 , np .nan , np .nan , 8 , 8 ],
249
+ "B" : [9 , np .nan , 5 , 2 , 2 , 2 , 5 , 4 , 5 , 3 , 4 , np .nan , np .nan , 4 , 4 ],
250
+ }
231
251
)
232
- tm .assert_frame_equal (sorted_df , expected )
233
-
234
- expected = DataFrame (
235
- {"A" : [np .nan , 8 , 6 , 4 , 2 , 1 , 1 ], "B" : [5 , 4 , 5 , 5 , np .nan , 9 , 2 ]},
236
- index = [2 , 5 , 4 , 6 , 1 , 0 , 3 ],
252
+ # All rows with NaN in col "B" only have unique values in "A", therefore,
253
+ # only the rows with NaNs in "A" have to be treated individually:
254
+ expected_idx = (
255
+ [11 , 12 , 2 ] + expected_idx_non_na
256
+ if na_position == "first"
257
+ else expected_idx_non_na + [2 , 11 , 12 ]
237
258
)
259
+ expected = df .take (expected_idx )
238
260
sorted_df = df .sort_values (
239
- ["A" , "B" ], ascending = [ 0 , 0 ], na_position = "first" , kind = "mergesort"
261
+ ["A" , "B" ], ascending = ascending , na_position = na_position
240
262
)
241
263
tm .assert_frame_equal (sorted_df , expected )
242
264
0 commit comments