@@ -560,6 +560,74 @@ def test_unstack_dtypes(self):
560
560
assert left .shape == (3 , 2 )
561
561
tm .assert_frame_equal (left , right )
562
562
563
+ def test_unstack_unused_levels (self ):
564
+ # GH 17845: unused labels in index make unstack() cast int to float
565
+ idx = pd .MultiIndex .from_product ([['a' ], ['A' , 'B' , 'C' , 'D' ]])[:- 1 ]
566
+ df = pd .DataFrame ([[1 , 0 ]] * 3 , index = idx )
567
+
568
+ result = df .unstack ()
569
+ exp_col = pd .MultiIndex .from_product ([[0 , 1 ], ['A' , 'B' , 'C' ]])
570
+ expected = pd .DataFrame ([[1 , 1 , 1 , 0 , 0 , 0 ]], index = ['a' ],
571
+ columns = exp_col )
572
+ tm .assert_frame_equal (result , expected )
573
+ assert ((result .columns .levels [1 ] == idx .levels [1 ]).all ())
574
+
575
+ # Unused items on both levels
576
+ levels = [[0 , 1 , 7 ], [0 , 1 , 2 , 3 ]]
577
+ labels = [[0 , 0 , 1 , 1 ], [0 , 2 , 0 , 2 ]]
578
+ idx = pd .MultiIndex (levels , labels )
579
+ block = np .arange (4 ).reshape (2 , 2 )
580
+ df = pd .DataFrame (np .concatenate ([block , block + 4 ]), index = idx )
581
+ result = df .unstack ()
582
+ expected = pd .DataFrame (np .concatenate ([block * 2 , block * 2 + 1 ],
583
+ axis = 1 ),
584
+ columns = idx )
585
+ tm .assert_frame_equal (result , expected )
586
+ assert ((result .columns .levels [1 ] == idx .levels [1 ]).all ())
587
+
588
+ # With mixed dtype and NaN
589
+ levels = [['a' , 2 , 'c' ], [1 , 3 , 5 , 7 ]]
590
+ labels = [[0 , - 1 , 1 , 1 ], [0 , 2 , - 1 , 2 ]]
591
+ idx = pd .MultiIndex (levels , labels )
592
+ data = np .arange (8 )
593
+ df = pd .DataFrame (data .reshape (4 , 2 ), index = idx )
594
+
595
+ cases = ((0 , [13 , 16 , 6 , 9 , 2 , 5 , 8 , 11 ],
596
+ [np .nan , 'a' , 2 ], [np .nan , 5 , 1 ]),
597
+ (1 , [8 , 11 , 1 , 4 , 12 , 15 , 13 , 16 ],
598
+ [np .nan , 5 , 1 ], [np .nan , 'a' , 2 ]))
599
+ for level , idces , col_level , idx_level in cases :
600
+ result = df .unstack (level = level )
601
+ exp_data = np .zeros (18 ) * np .nan
602
+ exp_data [idces ] = data
603
+ cols = pd .MultiIndex .from_product ([[0 , 1 ], col_level ])
604
+ expected = pd .DataFrame (exp_data .reshape (3 , 6 ),
605
+ index = idx_level , columns = cols )
606
+ # Broken (GH 18455):
607
+ # tm.assert_frame_equal(result, expected)
608
+ diff = result - expected
609
+ assert (diff .sum ().sum () == 0 )
610
+ assert ((diff + 1 ).sum ().sum () == 8 )
611
+
612
+ assert ((result .columns .levels [1 ] == idx .levels [level ]).all ())
613
+
614
+ @pytest .mark .parametrize ("cols" , [['A' , 'C' ], slice (None )])
615
+ def test_unstack_unused_level (self , cols ):
616
+ # GH 18562 : unused labels on the unstacked level
617
+ df = pd .DataFrame ([[2010 , 'a' , 'I' ],
618
+ [2011 , 'b' , 'II' ]],
619
+ columns = ['A' , 'B' , 'C' ])
620
+
621
+ ind = df .set_index (['A' , 'B' , 'C' ], drop = False )
622
+ selection = ind .loc [(slice (None ), slice (None ), 'I' ), cols ]
623
+ result = selection .unstack ()
624
+
625
+ expected = ind .iloc [[0 ]][cols ]
626
+ expected .columns = MultiIndex .from_product ([expected .columns , ['I' ]],
627
+ names = [None , 'C' ])
628
+ expected .index = expected .index .droplevel ('C' )
629
+ tm .assert_frame_equal (result , expected )
630
+
563
631
def test_unstack_nan_index (self ): # GH7466
564
632
cast = lambda val : '{0:1}' .format ('' if val != val else val )
565
633
nan = np .nan
0 commit comments