@@ -102,8 +102,11 @@ class _Unstacker:
102
102
unstacked : DataFrame
103
103
"""
104
104
105
- def __init__ (self , index : MultiIndex , level : Level , constructor ) -> None :
105
+ def __init__ (
106
+ self , index : MultiIndex , level : Level , constructor , sort : bool = True
107
+ ) -> None :
106
108
self .constructor = constructor
109
+ self .sort = sort
107
110
108
111
self .index = index .remove_unused_levels ()
109
112
@@ -119,11 +122,15 @@ def __init__(self, index: MultiIndex, level: Level, constructor) -> None:
119
122
self .removed_name = self .new_index_names .pop (self .level )
120
123
self .removed_level = self .new_index_levels .pop (self .level )
121
124
self .removed_level_full = index .levels [self .level ]
125
+ if not self .sort :
126
+ unique_codes = unique (self .index .codes [self .level ])
127
+ self .removed_level = self .removed_level .take (unique_codes )
128
+ self .removed_level_full = self .removed_level_full .take (unique_codes )
122
129
123
130
# Bug fix GH 20601
124
131
# If the data frame is too big, the number of unique index combination
125
132
# will cause int32 overflow on windows environments.
126
- # We want to check and raise an error before this happens
133
+ # We want to check and raise an warning before this happens
127
134
num_rows = np .max ([index_level .size for index_level in self .new_index_levels ])
128
135
num_columns = self .removed_level .size
129
136
@@ -164,13 +171,17 @@ def _indexer_and_to_sort(
164
171
@cache_readonly
165
172
def sorted_labels (self ) -> list [np .ndarray ]:
166
173
indexer , to_sort = self ._indexer_and_to_sort
167
- return [line .take (indexer ) for line in to_sort ]
174
+ if self .sort :
175
+ return [line .take (indexer ) for line in to_sort ]
176
+ return to_sort
168
177
169
178
def _make_sorted_values (self , values : np .ndarray ) -> np .ndarray :
170
- indexer , _ = self ._indexer_and_to_sort
179
+ if self .sort :
180
+ indexer , _ = self ._indexer_and_to_sort
171
181
172
- sorted_values = algos .take_nd (values , indexer , axis = 0 )
173
- return sorted_values
182
+ sorted_values = algos .take_nd (values , indexer , axis = 0 )
183
+ return sorted_values
184
+ return values
174
185
175
186
def _make_selectors (self ):
176
187
new_levels = self .new_index_levels
@@ -195,7 +206,10 @@ def _make_selectors(self):
195
206
196
207
self .group_index = comp_index
197
208
self .mask = mask
198
- self .compressor = comp_index .searchsorted (np .arange (ngroups ))
209
+ if self .sort :
210
+ self .compressor = comp_index .searchsorted (np .arange (ngroups ))
211
+ else :
212
+ self .compressor = np .sort (np .unique (comp_index , return_index = True )[1 ])
199
213
200
214
@cache_readonly
201
215
def mask_all (self ) -> bool :
@@ -376,7 +390,9 @@ def new_index(self) -> MultiIndex:
376
390
)
377
391
378
392
379
- def _unstack_multiple (data : Series | DataFrame , clocs , fill_value = None ):
393
+ def _unstack_multiple (
394
+ data : Series | DataFrame , clocs , fill_value = None , sort : bool = True
395
+ ):
380
396
if len (clocs ) == 0 :
381
397
return data
382
398
@@ -421,7 +437,7 @@ def _unstack_multiple(data: Series | DataFrame, clocs, fill_value=None):
421
437
dummy = data .copy ()
422
438
dummy .index = dummy_index
423
439
424
- unstacked = dummy .unstack ("__placeholder__" , fill_value = fill_value )
440
+ unstacked = dummy .unstack ("__placeholder__" , fill_value = fill_value , sort = sort )
425
441
new_levels = clevels
426
442
new_names = cnames
427
443
new_codes = recons_codes
@@ -430,7 +446,7 @@ def _unstack_multiple(data: Series | DataFrame, clocs, fill_value=None):
430
446
result = data
431
447
while clocs :
432
448
val = clocs .pop (0 )
433
- result = result .unstack (val , fill_value = fill_value )
449
+ result = result .unstack (val , fill_value = fill_value , sort = sort )
434
450
clocs = [v if v < val else v - 1 for v in clocs ]
435
451
436
452
return result
@@ -439,7 +455,9 @@ def _unstack_multiple(data: Series | DataFrame, clocs, fill_value=None):
439
455
dummy_df = data .copy (deep = False )
440
456
dummy_df .index = dummy_index
441
457
442
- unstacked = dummy_df .unstack ("__placeholder__" , fill_value = fill_value )
458
+ unstacked = dummy_df .unstack (
459
+ "__placeholder__" , fill_value = fill_value , sort = sort
460
+ )
443
461
if isinstance (unstacked , Series ):
444
462
unstcols = unstacked .index
445
463
else :
@@ -464,12 +482,12 @@ def _unstack_multiple(data: Series | DataFrame, clocs, fill_value=None):
464
482
return unstacked
465
483
466
484
467
- def unstack (obj : Series | DataFrame , level , fill_value = None ):
485
+ def unstack (obj : Series | DataFrame , level , fill_value = None , sort : bool = True ):
468
486
if isinstance (level , (tuple , list )):
469
487
if len (level ) != 1 :
470
488
# _unstack_multiple only handles MultiIndexes,
471
489
# and isn't needed for a single level
472
- return _unstack_multiple (obj , level , fill_value = fill_value )
490
+ return _unstack_multiple (obj , level , fill_value = fill_value , sort = sort )
473
491
else :
474
492
level = level [0 ]
475
493
@@ -479,9 +497,9 @@ def unstack(obj: Series | DataFrame, level, fill_value=None):
479
497
480
498
if isinstance (obj , DataFrame ):
481
499
if isinstance (obj .index , MultiIndex ):
482
- return _unstack_frame (obj , level , fill_value = fill_value )
500
+ return _unstack_frame (obj , level , fill_value = fill_value , sort = sort )
483
501
else :
484
- return obj .T .stack (dropna = False )
502
+ return obj .T .stack (dropna = False , sort = sort )
485
503
elif not isinstance (obj .index , MultiIndex ):
486
504
# GH 36113
487
505
# Give nicer error messages when unstack a Series whose
@@ -491,18 +509,22 @@ def unstack(obj: Series | DataFrame, level, fill_value=None):
491
509
)
492
510
else :
493
511
if is_1d_only_ea_dtype (obj .dtype ):
494
- return _unstack_extension_series (obj , level , fill_value )
512
+ return _unstack_extension_series (obj , level , fill_value , sort = sort )
495
513
unstacker = _Unstacker (
496
- obj .index , level = level , constructor = obj ._constructor_expanddim
514
+ obj .index , level = level , constructor = obj ._constructor_expanddim , sort = sort
497
515
)
498
516
return unstacker .get_result (
499
517
obj ._values , value_columns = None , fill_value = fill_value
500
518
)
501
519
502
520
503
- def _unstack_frame (obj : DataFrame , level , fill_value = None ) -> DataFrame :
521
+ def _unstack_frame (
522
+ obj : DataFrame , level , fill_value = None , sort : bool = True
523
+ ) -> DataFrame :
504
524
assert isinstance (obj .index , MultiIndex ) # checked by caller
505
- unstacker = _Unstacker (obj .index , level = level , constructor = obj ._constructor )
525
+ unstacker = _Unstacker (
526
+ obj .index , level = level , constructor = obj ._constructor , sort = sort
527
+ )
506
528
507
529
if not obj ._can_fast_transpose :
508
530
mgr = obj ._mgr .unstack (unstacker , fill_value = fill_value )
@@ -513,7 +535,9 @@ def _unstack_frame(obj: DataFrame, level, fill_value=None) -> DataFrame:
513
535
)
514
536
515
537
516
- def _unstack_extension_series (series : Series , level , fill_value ) -> DataFrame :
538
+ def _unstack_extension_series (
539
+ series : Series , level , fill_value , sort : bool
540
+ ) -> DataFrame :
517
541
"""
518
542
Unstack an ExtensionArray-backed Series.
519
543
@@ -529,6 +553,8 @@ def _unstack_extension_series(series: Series, level, fill_value) -> DataFrame:
529
553
The user-level (not physical storage) fill value to use for
530
554
missing values introduced by the reshape. Passed to
531
555
``series.values.take``.
556
+ sort : bool
557
+ Whether to sort the resulting MuliIndex levels
532
558
533
559
Returns
534
560
-------
@@ -538,7 +564,7 @@ def _unstack_extension_series(series: Series, level, fill_value) -> DataFrame:
538
564
"""
539
565
# Defer to the logic in ExtensionBlock._unstack
540
566
df = series .to_frame ()
541
- result = df .unstack (level = level , fill_value = fill_value )
567
+ result = df .unstack (level = level , fill_value = fill_value , sort = sort )
542
568
543
569
# equiv: result.droplevel(level=0, axis=1)
544
570
# but this avoids an extra copy
0 commit comments