@@ -560,6 +560,73 @@ def test_unstack_dtypes(self):
560
560
assert left .shape == (3 , 2 )
561
561
tm .assert_frame_equal (left , right )
562
562
563
+ def test_unstack_unused_levels (self ):
564
+ # GH 17845: sliced columns of int DataFrame
565
+ idx = pd .MultiIndex .from_product ([['a' ], ['A' , 'B' , 'C' , 'D' ]])[:- 1 ]
566
+ df = pd .DataFrame ([[1 , 0 ]] * 3 , index = idx )
567
+
568
+ result = df .unstack ()
569
+ exp_col = pd .MultiIndex .from_product ([[0 , 1 ], ['A' , 'B' , 'C' ]])
570
+ expected = pd .DataFrame ([[1 , 1 , 1 , 0 , 0 , 0 ]], index = ['a' ],
571
+ columns = exp_col )
572
+ tm .assert_frame_equal (result , expected )
573
+ assert ((result .columns .levels [1 ] == idx .levels [1 ]).all ())
574
+
575
+ # Unused items on both levels
576
+ levels = [[0 , 1 , 7 ], [0 , 1 , 2 , 3 ]]
577
+ labels = [[0 , 0 , 1 , 1 ], [0 , 2 , 0 , 2 ]]
578
+ idx = pd .MultiIndex (levels , labels )
579
+ block = np .arange (4 ).reshape (2 , 2 )
580
+ df = pd .DataFrame (np .concatenate ([block , block + 4 ]), index = idx )
581
+ result = df .unstack ()
582
+ expected = pd .DataFrame (np .concatenate ([block * 2 , block * 2 - 1 ],
583
+ axis = 1 ),
584
+ columns = idx )
585
+ assert ((result .columns .levels [1 ] == idx .levels [1 ]).all ())
586
+
587
+ # With mixed dtype and NaN
588
+ levels = [['a' , 2 , 'c' ], [1 , 3 , 5 , 7 ]]
589
+ labels = [[0 , - 1 , 1 , 1 ], [0 , 2 , - 1 , 2 ]]
590
+ idx = pd .MultiIndex (levels , labels )
591
+ data = np .arange (8 )
592
+ df = pd .DataFrame (data .reshape (4 , 2 ), index = idx )
593
+
594
+ cases = ((0 , [13 , 16 , 6 , 9 , 2 , 5 , 8 , 11 ],
595
+ [np .nan , 'a' , 2 ], [np .nan , 5 , 1 ]),
596
+ (1 , [8 , 11 , 1 , 4 , 12 , 15 , 13 , 16 ],
597
+ [np .nan , 5 , 1 ], [np .nan , 'a' , 2 ]))
598
+ for level , idces , col_level , idx_level in cases :
599
+ result = df .unstack (level = level )
600
+ exp_data = np .zeros (18 ) * np .nan
601
+ exp_data [idces ] = data
602
+ cols = pd .MultiIndex .from_product ([[0 , 1 ], col_level ])
603
+ expected = pd .DataFrame (exp_data .reshape (3 , 6 ),
604
+ index = idx_level , columns = cols )
605
+ # Broken (GH 18455):
606
+ # tm.assert_frame_equal(result, expected)
607
+ diff = result - expected
608
+ assert (diff .sum ().sum () == 0 )
609
+ assert ((diff + 1 ).sum ().sum () == 8 )
610
+
611
+ assert ((result .columns .levels [1 ] == idx .levels [level ]).all ())
612
+
613
+ @pytest .mark .parametrize ("cols" , [['A' , 'C' ], slice (None )])
614
+ def test_unstack_unused_level (self , cols ):
615
+ # GH 18562 : unused labels on the unstacked level
616
+ df = pd .DataFrame ([[2010 , 'a' , 'I' ],
617
+ [2011 , 'b' , 'II' ]],
618
+ columns = ['A' , 'B' , 'C' ])
619
+
620
+ ind = df .set_index (['A' , 'B' , 'C' ], drop = False )
621
+ selection = ind .loc [(slice (None ), slice (None ), 'I' ), cols ]
622
+ result = selection .unstack ()
623
+
624
+ expected = ind .iloc [[0 ]][cols ]
625
+ expected .columns = MultiIndex .from_product ([expected .columns , ['I' ]],
626
+ names = [None , 'C' ])
627
+ expected .index = expected .index .droplevel ('C' )
628
+ assert_frame_equal (result , expected )
629
+
563
630
def test_unstack_nan_index (self ): # GH7466
564
631
cast = lambda val : '{0:1}' .format ('' if val != val else val )
565
632
nan = np .nan
0 commit comments