@@ -536,6 +536,56 @@ def test_unstack_dtypes(self):
536
536
assert left .shape == (3 , 2 )
537
537
tm .assert_frame_equal (left , right )
538
538
539
+ def test_unstack_unused_levels (self ):
540
+ # GH 17845: sliced columns of int DataFrame
541
+ idx = pd .MultiIndex .from_product ([['a' ], ['A' , 'B' , 'C' , 'D' ]])[:- 1 ]
542
+ df = pd .DataFrame ([[1 , 0 ]] * 3 , index = idx )
543
+
544
+ result = df .unstack ()
545
+ exp_col = pd .MultiIndex .from_product ([[0 , 1 ], ['A' , 'B' , 'C' ]])
546
+ expected = pd .DataFrame ([[1 , 1 , 1 , 0 , 0 , 0 ]], index = ['a' ],
547
+ columns = exp_col )
548
+ tm .assert_frame_equal (result , expected )
549
+ assert ((result .columns .levels [1 ] == idx .levels [1 ]).all ())
550
+
551
+ # Unused items on both levels
552
+ levels = [[0 , 1 , 7 ], [0 , 1 , 2 , 3 ]]
553
+ labels = [[0 , 0 , 1 , 1 ], [0 , 2 , 0 , 2 ]]
554
+ idx = pd .MultiIndex (levels , labels )
555
+ block = np .arange (4 ).reshape (2 , 2 )
556
+ df = pd .DataFrame (np .concatenate ([block , block + 4 ]), index = idx )
557
+ result = df .unstack ()
558
+ expected = pd .DataFrame (np .concatenate ([block * 2 , block * 2 - 1 ],
559
+ axis = 1 ),
560
+ columns = idx )
561
+ assert ((result .columns .levels [1 ] == idx .levels [1 ]).all ())
562
+
563
+ # With mixed dtype and NaN
564
+ levels = [['a' , 2 , 'c' ], [1 , 3 , 5 , 7 ]]
565
+ labels = [[0 , - 1 , 1 , 1 ], [0 , 2 , - 1 , 2 ]]
566
+ idx = pd .MultiIndex (levels , labels )
567
+ data = np .arange (8 )
568
+ df = pd .DataFrame (data .reshape (4 , 2 ), index = idx )
569
+
570
+ cases = ((0 , [13 , 16 , 6 , 9 , 2 , 5 , 8 , 11 ],
571
+ [np .nan , 'a' , 2 ], [np .nan , 5 , 1 ]),
572
+ (1 , [8 , 11 , 1 , 4 , 12 , 15 , 13 , 16 ],
573
+ [np .nan , 5 , 1 ], [np .nan , 'a' , 2 ]))
574
+ for level , idces , col_level , idx_level in cases :
575
+ result = df .unstack (level = level )
576
+ exp_data = np .zeros (18 ) * np .nan
577
+ exp_data [idces ] = data
578
+ cols = pd .MultiIndex .from_product ([[0 , 1 ], col_level ])
579
+ expected = pd .DataFrame (exp_data .reshape (3 , 6 ),
580
+ index = idx_level , columns = cols )
581
+ # Broken (GH 18455):
582
+ # tm.assert_frame_equal(result, expected)
583
+ diff = result - expected
584
+ assert (diff .sum ().sum () == 0 )
585
+ assert ((diff + 1 ).sum ().sum () == 8 )
586
+
587
+ assert ((result .columns .levels [1 ] == idx .levels [level ]).all ())
588
+
539
589
def test_unstack_non_unique_index_names (self ):
540
590
idx = MultiIndex .from_tuples ([('a' , 'b' ), ('c' , 'd' )],
541
591
names = ['c1' , 'c1' ])
0 commit comments