Skip to content

Commit b7c5e3b

Browse files
dsm054TomAugspurger
authored andcommitted
BUG: Preserve data order when stacking unsorted levels (#16323) (#16325)
(cherry picked from commit b1ff291)
1 parent 3c3eb30 commit b7c5e3b

File tree

3 files changed

+33
-2
lines changed

3 files changed

+33
-2
lines changed

doc/source/whatsnew/v0.20.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ Sparse
7676
Reshaping
7777
^^^^^^^^^
7878

79-
79+
- Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`)
8080

8181

8282
Numeric

pandas/core/reshape/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,7 @@ def _convert_level_number(level_num, columns):
689689
new_labels = [np.arange(N).repeat(levsize)]
690690
new_names = [this.index.name] # something better?
691691

692-
new_levels.append(frame.columns.levels[level_num])
692+
new_levels.append(level_vals)
693693
new_labels.append(np.tile(level_labels, N))
694694
new_names.append(frame.columns.names[level_num])
695695

pandas/tests/test_multilevel.py

+31
Original file line numberDiff line numberDiff line change
@@ -1195,6 +1195,37 @@ def test_unstack_unobserved_keys(self):
11951195
recons = result.stack()
11961196
tm.assert_frame_equal(recons, df)
11971197

1198+
def test_stack_order_with_unsorted_levels(self):
1199+
# GH 16323
1200+
1201+
def manual_compare_stacked(df, df_stacked, lev0, lev1):
1202+
assert all(df.loc[row, col] ==
1203+
df_stacked.loc[(row, col[lev0]), col[lev1]]
1204+
for row in df.index for col in df.columns)
1205+
1206+
# deep check for 1-row case
1207+
for width in [2, 3]:
1208+
levels_poss = itertools.product(
1209+
itertools.permutations([0, 1, 2], width),
1210+
repeat=2)
1211+
1212+
for levels in levels_poss:
1213+
columns = MultiIndex(levels=levels,
1214+
labels=[[0, 0, 1, 1],
1215+
[0, 1, 0, 1]])
1216+
df = DataFrame(columns=columns, data=[range(4)])
1217+
for stack_lev in range(2):
1218+
df_stacked = df.stack(stack_lev)
1219+
manual_compare_stacked(df, df_stacked,
1220+
stack_lev, 1 - stack_lev)
1221+
1222+
# check multi-row case
1223+
mi = MultiIndex(levels=[["A", "C", "B"], ["B", "A", "C"]],
1224+
labels=[np.repeat(range(3), 3), np.tile(range(3), 3)])
1225+
df = DataFrame(columns=mi, index=range(5),
1226+
data=np.arange(5 * len(mi)).reshape(5, -1))
1227+
manual_compare_stacked(df, df.stack(0), 0, 1)
1228+
11981229
def test_groupby_corner(self):
11991230
midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']],
12001231
labels=[[0], [0], [0]],

0 commit comments

Comments
 (0)