@@ -508,6 +508,14 @@ def get_compressed_ids(labels, sizes):
508
508
return comp_index , obs_ids
509
509
510
510
511
+ def _iterate_through_set (x ):
512
+ if isinstance (x , set ):
513
+ for y in x :
514
+ yield y
515
+ else :
516
+ yield x
517
+
518
+
511
519
def stack (frame , level = - 1 , dropna = True ):
512
520
"""
513
521
Convert DataFrame to Series with multi-level Index. Columns become the
@@ -517,19 +525,18 @@ def stack(frame, level=-1, dropna=True):
517
525
-------
518
526
stacked : Series
519
527
"""
520
- N , K = frame .shape
521
528
if isinstance (frame .columns , MultiIndex ):
522
- if frame .columns ._reference_duplicate_name (level ):
529
+ if any (frame .columns ._reference_duplicate_name (lev )
530
+ for lev in _iterate_through_set (level )):
523
531
msg = ("Ambiguous reference to {0}. The column "
524
532
"names are not unique." .format (level ))
525
533
raise ValueError (msg )
526
-
527
- # Will also convert negative level numbers and check if out of bounds.
528
- level_num = frame .columns ._get_level_number (level )
529
-
530
- if isinstance (frame .columns , MultiIndex ):
534
+ # Will also convert negative level numbers and check if out of bounds.
535
+ level_num = frame .columns ._get_level_number (level )
531
536
return _stack_multi_columns (frame , level_num = level_num , dropna = dropna )
532
- elif isinstance (frame .index , MultiIndex ):
537
+
538
+ N , K = frame .shape
539
+ if isinstance (frame .index , MultiIndex ):
533
540
new_levels = list (frame .index .levels )
534
541
new_levels .append (frame .columns )
535
542
@@ -559,13 +566,13 @@ def stack(frame, level=-1, dropna=True):
559
566
def stack_multiple (frame , level , dropna = True ):
560
567
# If all passed levels match up to column names, no
561
568
# ambiguity about what to do
562
- if all (lev in frame .columns .names for lev in level ):
569
+ if all (lev in frame .columns .names for levl in level for lev in _iterate_through_set ( levl ) ):
563
570
result = frame
564
571
for lev in level :
565
572
result = stack (result , lev , dropna = dropna )
566
573
567
574
# Otherwise, level numbers may change as each successive level is stacked
568
- elif all (isinstance (lev , int ) for lev in level ):
575
+ elif all (isinstance (lev , int ) for levl in level for lev in _iterate_through_set ( levl ) ):
569
576
# As each stack is done, the level numbers decrease, so we need
570
577
# to account for that when level is a sequence of ints
571
578
result = frame
@@ -576,16 +583,19 @@ def stack_multiple(frame, level, dropna=True):
576
583
# Can't iterate directly through level as we might need to change
577
584
# values as we go
578
585
for index in range (len (level )):
579
- lev = level [index ]
580
- result = stack (result , lev , dropna = dropna )
586
+ levl = level [index ]
587
+ result = stack (result , levl , dropna = dropna )
581
588
# Decrement all level numbers greater than current, as these
582
- # have now shifted down by one
589
+ # have now shifted down
583
590
updated_level = []
584
591
for other in level :
585
- if other > lev :
586
- updated_level .append (other - 1 )
592
+ if isinstance (other , set ):
593
+ updated_level .append ({(othr - sum ((othr > lev )
594
+ for lev in _iterate_through_set (levl )))
595
+ for othr in other })
587
596
else :
588
- updated_level .append (other )
597
+ updated_level .append (other - sum ((other > lev )
598
+ for lev in _iterate_through_set (levl )))
589
599
level = updated_level
590
600
591
601
else :
@@ -616,85 +626,101 @@ def _convert_level_number(level_num, columns):
616
626
this = frame .copy ()
617
627
618
628
# this makes life much simpler
619
- if level_num != frame .columns .nlevels - 1 :
620
- # roll levels to put selected level at end
621
- roll_columns = this .columns
622
- for i in range (level_num , frame .columns .nlevels - 1 ):
629
+ # roll levels to put selected level(s) at end
630
+ level_nums = level_num if isinstance (level_num , set ) else {level_num }
631
+ roll_columns = this .columns
632
+ for j , level_num in enumerate (sorted (level_nums , reverse = True )):
633
+ for i in range (level_num , frame .columns .nlevels - (j + 1 )):
623
634
# Need to check if the ints conflict with level names
624
635
lev1 = _convert_level_number (i , roll_columns )
625
636
lev2 = _convert_level_number (i + 1 , roll_columns )
626
637
roll_columns = roll_columns .swaplevel (lev1 , lev2 )
627
- this .columns = roll_columns
638
+ this .columns = roll_columns
628
639
629
640
if not this .columns .is_lexsorted ():
630
641
# Workaround the edge case where 0 is one of the column names,
631
- # which interferes with trying to sort based on the first
632
- # level
642
+ # which interferes with trying to sort based on the first level
633
643
level_to_sort = _convert_level_number (0 , this .columns )
634
644
this = this .sortlevel (level_to_sort , axis = 1 )
635
645
636
- # tuple list excluding level for grouping columns
637
- if len (frame .columns .levels ) > 2 :
646
+ num_levels_to_stack = len (level_nums )
647
+ level_vals = this .columns .levels [- num_levels_to_stack :]
648
+ level_labels = sorted (set (zip (* this .columns .labels [- num_levels_to_stack :])))
649
+ level_vals_used = MultiIndex .from_tuples ([tuple (level_vals [i ][lab ] for i , lab in enumerate (label ))
650
+ for label in level_labels ],
651
+ names = this .columns .names [- num_levels_to_stack :])
652
+ levsize = len (level_labels )
653
+
654
+ # construct new_index
655
+ N = len (this )
656
+ if isinstance (this .index , MultiIndex ):
657
+ new_levels = list (this .index .levels )
658
+ new_names = list (this .index .names )
659
+ new_labels = [lab .repeat (levsize ) for lab in this .index .labels ]
660
+ else :
661
+ new_levels = [this .index ]
662
+ new_labels = [np .arange (N ).repeat (levsize )]
663
+ new_names = [this .index .name ] # something better?
664
+ new_levels += level_vals
665
+ new_labels += [np .tile (labels , N ) for labels in zip (* level_labels )]
666
+ new_names += level_vals_used .names
667
+ new_index = MultiIndex (levels = new_levels , labels = new_labels ,
668
+ names = new_names , verify_integrity = False )
669
+
670
+ # if stacking all levels in columns, result will be a Series
671
+ if len (frame .columns .levels ) == num_levels_to_stack :
672
+ new_data = frame .values .ravel ()
673
+ if dropna :
674
+ mask = notnull (new_data )
675
+ new_data = new_data [mask ]
676
+ new_index = new_index [mask ]
677
+ return Series (new_data , index = new_index )
678
+
679
+ # result will be a DataFrame
680
+
681
+ # construct new_columns
682
+ if len (frame .columns .levels ) > (num_levels_to_stack + 1 ):
683
+ # result columns will be a MultiIndex
684
+ # tuple list excluding level for grouping columns
638
685
tuples = list (zip (* [
639
686
lev .take (lab ) for lev , lab in
640
- zip (this .columns .levels [:- 1 ], this .columns .labels [:- 1 ])
687
+ zip (this .columns .levels [:- num_levels_to_stack ],
688
+ this .columns .labels [:- num_levels_to_stack ])
641
689
]))
642
690
unique_groups = [key for key , _ in itertools .groupby (tuples )]
643
- new_names = this .columns .names [:- 1 ]
691
+ new_names = this .columns .names [:- num_levels_to_stack ]
644
692
new_columns = MultiIndex .from_tuples (unique_groups , names = new_names )
645
693
else :
694
+ # result columns will be an Index
646
695
new_columns = unique_groups = this .columns .levels [0 ]
647
696
648
- # time to ravel the values
697
+ # construct new_data
649
698
new_data = {}
650
- level_vals = this .columns .levels [- 1 ]
651
- level_labels = sorted (set (this .columns .labels [- 1 ]))
652
- level_vals_used = level_vals [level_labels ]
653
- levsize = len (level_labels )
654
699
drop_cols = []
655
700
for key in unique_groups :
656
701
loc = this .columns .get_loc (key )
657
702
slice_len = loc .stop - loc .start
658
703
# can make more efficient?
659
-
660
704
if slice_len == 0 :
661
705
drop_cols .append (key )
662
706
continue
663
707
elif slice_len != levsize :
664
708
chunk = this .ix [:, this .columns [loc ]]
665
- chunk .columns = level_vals .take (chunk .columns .labels [- 1 ])
709
+ chunk .columns = MultiIndex .from_arrays ([vals .take (labels ) for (vals , labels )
710
+ in zip (level_vals , chunk .columns .labels [- num_levels_to_stack :])],
711
+ names = chunk .columns .names [- num_levels_to_stack :])
666
712
value_slice = chunk .reindex (columns = level_vals_used ).values
667
713
else :
668
714
if frame ._is_mixed_type :
669
715
value_slice = this .ix [:, this .columns [loc ]].values
670
716
else :
671
717
value_slice = this .values [:, loc ]
672
-
673
718
new_data [key ] = value_slice .ravel ()
674
719
675
720
if len (drop_cols ) > 0 :
676
721
new_columns = new_columns - drop_cols
677
722
678
- N = len (this )
679
-
680
- if isinstance (this .index , MultiIndex ):
681
- new_levels = list (this .index .levels )
682
- new_names = list (this .index .names )
683
- new_labels = [lab .repeat (levsize ) for lab in this .index .labels ]
684
- else :
685
- new_levels = [this .index ]
686
- new_labels = [np .arange (N ).repeat (levsize )]
687
- new_names = [this .index .name ] # something better?
688
-
689
- new_levels .append (frame .columns .levels [level_num ])
690
- new_labels .append (np .tile (level_labels , N ))
691
- new_names .append (frame .columns .names [level_num ])
692
-
693
- new_index = MultiIndex (levels = new_levels , labels = new_labels ,
694
- names = new_names , verify_integrity = False )
695
-
696
723
result = DataFrame (new_data , index = new_index , columns = new_columns )
697
-
698
724
# more efficient way to go about this? can do the whole masking biz but
699
725
# will only save a small amount of time...
700
726
if dropna :
0 commit comments