diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 0829aa8f5a509..622e3bd553f8c 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -653,7 +653,13 @@ def _convert_level_number(level_num, columns): # time to ravel the values new_data = {} level_vals = this.columns.levels[-1] - level_labels = sorted(set(this.columns.labels[-1])) + level_labels = list() + for label in this.columns.labels[-1]: + # GH 20945 if labels are not monotonic we were mangling + # alignment when moving to index; ensure we preserve order + if label not in level_labels: + level_labels.append(label) + level_vals_used = level_vals[level_labels] levsize = len(level_labels) drop_cols = [] diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index d89731dc09044..26bd72ebbf263 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -414,6 +414,25 @@ def test_stack_mixed_levels(self): assert_frame_equal(df3.stack(level=['animal', 0]), animal_hair_stacked, check_names=False) + def test_stack_retains_index_order_non_monotonic(self): + # GH 20945 + df = pd.DataFrame([ + ['DIM', 'A', 1, 2, 3, 4], + ['DIM', 'B', 11, 22, 33, 44], + ]) + df.columns = ["dim1", "dim2", 'c', 'b', 'a', 'd'] + df.columns.name = 'foo' + df = df.set_index(["dim1", "dim2"]) + + expected_mi = pd.MultiIndex.from_product([['DIM'], ['c', 'b', 'a', 'd']]) + expected_mi.names = ['dim1', 'foo'] + expected = pd.DataFrame([[1, 11], [2, 22], [3, 33], [4, 44]], + index=expected_mi, columns=['A', 'B']) + expected.columns.name = 'dim2' + + result = df.unstack('dim2').stack(level=0) + tm.assert_frame_equal(result, expected) + def test_stack_int_level_names(self): columns = MultiIndex.from_tuples( [('A', 'cat', 'long'), ('B', 'cat', 'long'),