28
28
from pandas .core .dtypes .missing import notna
29
29
30
30
import pandas .core .algorithms as algos
31
+ from pandas .core .algorithms import unique
31
32
from pandas .core .arrays .categorical import factorize_from_iterable
32
33
from pandas .core .construction import ensure_wrapped_if_datetimelike
33
34
from pandas .core .frame import DataFrame
@@ -545,7 +546,7 @@ def _unstack_extension_series(series: Series, level, fill_value) -> DataFrame:
545
546
return result
546
547
547
548
548
- def stack (frame : DataFrame , level = - 1 , dropna : bool = True ):
549
+ def stack (frame : DataFrame , level = - 1 , dropna : bool = True , sort : bool = True ):
549
550
"""
550
551
Convert DataFrame to Series with multi-level Index. Columns become the
551
552
second level of the resulting hierarchical index
@@ -567,7 +568,9 @@ def factorize(index):
567
568
level_num = frame .columns ._get_level_number (level )
568
569
569
570
if isinstance (frame .columns , MultiIndex ):
570
- return _stack_multi_columns (frame , level_num = level_num , dropna = dropna )
571
+ return _stack_multi_columns (
572
+ frame , level_num = level_num , dropna = dropna , sort = sort
573
+ )
571
574
elif isinstance (frame .index , MultiIndex ):
572
575
new_levels = list (frame .index .levels )
573
576
new_codes = [lab .repeat (K ) for lab in frame .index .codes ]
@@ -620,13 +623,13 @@ def factorize(index):
620
623
return frame ._constructor_sliced (new_values , index = new_index )
621
624
622
625
623
- def stack_multiple (frame : DataFrame , level , dropna : bool = True ):
626
+ def stack_multiple (frame : DataFrame , level , dropna : bool = True , sort : bool = True ):
624
627
# If all passed levels match up to column names, no
625
628
# ambiguity about what to do
626
629
if all (lev in frame .columns .names for lev in level ):
627
630
result = frame
628
631
for lev in level :
629
- result = stack (result , lev , dropna = dropna )
632
+ result = stack (result , lev , dropna = dropna , sort = sort )
630
633
631
634
# Otherwise, level numbers may change as each successive level is stacked
632
635
elif all (isinstance (lev , int ) for lev in level ):
@@ -639,7 +642,7 @@ def stack_multiple(frame: DataFrame, level, dropna: bool = True):
639
642
640
643
while level :
641
644
lev = level .pop (0 )
642
- result = stack (result , lev , dropna = dropna )
645
+ result = stack (result , lev , dropna = dropna , sort = sort )
643
646
# Decrement all level numbers greater than current, as these
644
647
# have now shifted down by one
645
648
level = [v if v <= lev else v - 1 for v in level ]
@@ -681,7 +684,7 @@ def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex:
681
684
682
685
683
686
def _stack_multi_columns (
684
- frame : DataFrame , level_num : int = - 1 , dropna : bool = True
687
+ frame : DataFrame , level_num : int = - 1 , dropna : bool = True , sort : bool = True
685
688
) -> DataFrame :
686
689
def _convert_level_number (level_num : int , columns : Index ):
687
690
"""
@@ -711,7 +714,7 @@ def _convert_level_number(level_num: int, columns: Index):
711
714
roll_columns = roll_columns .swaplevel (lev1 , lev2 )
712
715
this .columns = mi_cols = roll_columns
713
716
714
- if not mi_cols ._is_lexsorted ():
717
+ if not mi_cols ._is_lexsorted () and sort :
715
718
# Workaround the edge case where 0 is one of the column names,
716
719
# which interferes with trying to sort based on the first
717
720
# level
@@ -725,7 +728,9 @@ def _convert_level_number(level_num: int, columns: Index):
725
728
# time to ravel the values
726
729
new_data = {}
727
730
level_vals = mi_cols .levels [- 1 ]
728
- level_codes = sorted (set (mi_cols .codes [- 1 ]))
731
+ level_codes = unique (mi_cols .codes [- 1 ])
732
+ if sort :
733
+ level_codes = np .sort (level_codes )
729
734
level_vals_nan = level_vals .insert (len (level_vals ), None )
730
735
731
736
level_vals_used = np .take (level_vals_nan , level_codes )
0 commit comments