From bd0eda25b40cd07a70286ceb105e7a0e9428abfe Mon Sep 17 00:00:00 2001 From: "D.S. McNeil" Date: Wed, 10 May 2017 23:13:28 -0400 Subject: [PATCH] BUG: Preserve data order when stacking unsorted levels (#16323) --- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/core/reshape/reshape.py | 2 +- pandas/tests/test_multilevel.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index bca92137891a0..983f3edfa2f46 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -73,7 +73,7 @@ Sparse Reshaping ^^^^^^^^^ - +- Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`) Numeric diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 779002b300cc7..b0ed6d4c4b84d 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -689,7 +689,7 @@ def _convert_level_number(level_num, columns): new_labels = [np.arange(N).repeat(levsize)] new_names = [this.index.name] # something better? - new_levels.append(frame.columns.levels[level_num]) + new_levels.append(level_vals) new_labels.append(np.tile(level_labels, N)) new_names.append(frame.columns.names[level_num]) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 9d80190ae2813..c8c210c42eac2 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1193,6 +1193,37 @@ def test_unstack_unobserved_keys(self): recons = result.stack() tm.assert_frame_equal(recons, df) + def test_stack_order_with_unsorted_levels(self): + # GH 16323 + + def manual_compare_stacked(df, df_stacked, lev0, lev1): + assert all(df.loc[row, col] == + df_stacked.loc[(row, col[lev0]), col[lev1]] + for row in df.index for col in df.columns) + + # deep check for 1-row case + for width in [2, 3]: + levels_poss = itertools.product( + itertools.permutations([0, 1, 2], width), + repeat=2) + + for levels in levels_poss: + columns = MultiIndex(levels=levels, + labels=[[0, 0, 1, 1], + [0, 1, 0, 1]]) + df = DataFrame(columns=columns, data=[range(4)]) + for stack_lev in range(2): + df_stacked = df.stack(stack_lev) + manual_compare_stacked(df, df_stacked, + stack_lev, 1 - stack_lev) + + # check multi-row case + mi = MultiIndex(levels=[["A", "C", "B"], ["B", "A", "C"]], + labels=[np.repeat(range(3), 3), np.tile(range(3), 3)]) + df = DataFrame(columns=mi, index=range(5), + data=np.arange(5 * len(mi)).reshape(5, -1)) + manual_compare_stacked(df, df.stack(0), 0, 1) + def test_groupby_corner(self): midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']], labels=[[0], [0], [0]],