Skip to content

Commit 0f899f4

Browse files
Merge pull request #8809 from onesandzeroes/stackfix
BUG: Passing multiple levels to stack when having mixed integer/string level names (#8584)
2 parents 08105ab + 4ae90ae commit 0f899f4

File tree

3 files changed

+100
-9
lines changed

3 files changed

+100
-9
lines changed

doc/source/whatsnew/v0.15.2.txt

+3
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,6 @@ Bug Fixes
9999
- Bug in `pd.infer_freq`/`DataFrame.inferred_freq` that prevented proper sub-daily frequency inference
100100
when the index contained DST days (:issue:`8772`).
101101
- Bug where index name was still used when plotting a series with ``use_index=False`` (:issue:`8558`).
102+
103+
- Bugs when trying to stack multiple columns, when some (or all)
104+
of the level names are numbers (:issue:`8584`).

pandas/core/reshape.py

+33-9
Original file line numberDiff line numberDiff line change
@@ -525,10 +525,10 @@ def stack(frame, level=-1, dropna=True):
525525
raise ValueError(msg)
526526

527527
# Will also convert negative level numbers and check if out of bounds.
528-
level = frame.columns._get_level_number(level)
528+
level_num = frame.columns._get_level_number(level)
529529

530530
if isinstance(frame.columns, MultiIndex):
531-
return _stack_multi_columns(frame, level=level, dropna=dropna)
531+
return _stack_multi_columns(frame, level_num=level_num, dropna=dropna)
532532
elif isinstance(frame.index, MultiIndex):
533533
new_levels = list(frame.index.levels)
534534
new_levels.append(frame.columns)
@@ -595,19 +595,43 @@ def stack_multiple(frame, level, dropna=True):
595595
return result
596596

597597

598-
def _stack_multi_columns(frame, level=-1, dropna=True):
598+
def _stack_multi_columns(frame, level_num=-1, dropna=True):
599+
def _convert_level_number(level_num, columns):
600+
"""
601+
Logic for converting the level number to something
602+
we can safely pass to swaplevel:
603+
604+
We generally want to convert the level number into
605+
a level name, except when columns do not have names,
606+
in which case we must leave as a level number
607+
"""
608+
if level_num in columns.names:
609+
return columns.names[level_num]
610+
else:
611+
if columns.names[level_num] is None:
612+
return level_num
613+
else:
614+
return columns.names[level_num]
615+
599616
this = frame.copy()
600617

601618
# this makes life much simpler
602-
if level != frame.columns.nlevels - 1:
619+
if level_num != frame.columns.nlevels - 1:
603620
# roll levels to put selected level at end
604621
roll_columns = this.columns
605-
for i in range(level, frame.columns.nlevels - 1):
606-
roll_columns = roll_columns.swaplevel(i, i + 1)
622+
for i in range(level_num, frame.columns.nlevels - 1):
623+
# Need to check if the ints conflict with level names
624+
lev1 = _convert_level_number(i, roll_columns)
625+
lev2 = _convert_level_number(i + 1, roll_columns)
626+
roll_columns = roll_columns.swaplevel(lev1, lev2)
607627
this.columns = roll_columns
608628

609629
if not this.columns.is_lexsorted():
610-
this = this.sortlevel(0, axis=1)
630+
# Workaround the edge case where 0 is one of the column names,
631+
# which interferes with trying to sort based on the first
632+
# level
633+
level_to_sort = _convert_level_number(0, this.columns)
634+
this = this.sortlevel(level_to_sort, axis=1)
611635

612636
# tuple list excluding level for grouping columns
613637
if len(frame.columns.levels) > 2:
@@ -660,9 +684,9 @@ def _stack_multi_columns(frame, level=-1, dropna=True):
660684
new_labels = [np.arange(N).repeat(levsize)]
661685
new_names = [this.index.name] # something better?
662686

663-
new_levels.append(frame.columns.levels[level])
687+
new_levels.append(frame.columns.levels[level_num])
664688
new_labels.append(np.tile(np.arange(levsize), N))
665-
new_names.append(frame.columns.names[level])
689+
new_names.append(frame.columns.names[level_num])
666690

667691
new_index = MultiIndex(levels=new_levels, labels=new_labels,
668692
names=new_names, verify_integrity=False)

pandas/tests/test_frame.py

+64
Original file line numberDiff line numberDiff line change
@@ -12110,6 +12110,70 @@ def test_stack_ints(self):
1211012110
df_named.stack(level=1).stack(level=1)
1211112111
)
1211212112

12113+
def test_stack_mixed_levels(self):
12114+
columns = MultiIndex.from_tuples(
12115+
[('A', 'cat', 'long'), ('B', 'cat', 'long'),
12116+
('A', 'dog', 'short'), ('B', 'dog', 'short')],
12117+
names=['exp', 'animal', 'hair_length']
12118+
)
12119+
df = DataFrame(randn(4, 4), columns=columns)
12120+
12121+
animal_hair_stacked = df.stack(level=['animal', 'hair_length'])
12122+
exp_hair_stacked = df.stack(level=['exp', 'hair_length'])
12123+
12124+
# GH #8584: Need to check that stacking works when a number
12125+
# is passed that is both a level name and in the range of
12126+
# the level numbers
12127+
df2 = df.copy()
12128+
df2.columns.names = ['exp', 'animal', 1]
12129+
assert_frame_equal(df2.stack(level=['animal', 1]),
12130+
animal_hair_stacked, check_names=False)
12131+
assert_frame_equal(df2.stack(level=['exp', 1]),
12132+
exp_hair_stacked, check_names=False)
12133+
12134+
# When mixed types are passed and the ints are not level
12135+
# names, raise
12136+
self.assertRaises(ValueError, df2.stack, level=['animal', 0])
12137+
12138+
# GH #8584: Having 0 in the level names could raise a
12139+
# strange error about lexsort depth
12140+
df3 = df.copy()
12141+
df3.columns.names = ['exp', 'animal', 0]
12142+
assert_frame_equal(df3.stack(level=['animal', 0]),
12143+
animal_hair_stacked, check_names=False)
12144+
12145+
def test_stack_int_level_names(self):
12146+
columns = MultiIndex.from_tuples(
12147+
[('A', 'cat', 'long'), ('B', 'cat', 'long'),
12148+
('A', 'dog', 'short'), ('B', 'dog', 'short')],
12149+
names=['exp', 'animal', 'hair_length']
12150+
)
12151+
df = DataFrame(randn(4, 4), columns=columns)
12152+
12153+
exp_animal_stacked = df.stack(level=['exp', 'animal'])
12154+
animal_hair_stacked = df.stack(level=['animal', 'hair_length'])
12155+
exp_hair_stacked = df.stack(level=['exp', 'hair_length'])
12156+
12157+
df2 = df.copy()
12158+
df2.columns.names = [0, 1, 2]
12159+
assert_frame_equal(df2.stack(level=[1, 2]), animal_hair_stacked,
12160+
check_names=False )
12161+
assert_frame_equal(df2.stack(level=[0, 1]), exp_animal_stacked,
12162+
check_names=False)
12163+
assert_frame_equal(df2.stack(level=[0, 2]), exp_hair_stacked,
12164+
check_names=False)
12165+
12166+
# Out-of-order int column names
12167+
df3 = df.copy()
12168+
df3.columns.names = [2, 0, 1]
12169+
assert_frame_equal(df3.stack(level=[0, 1]), animal_hair_stacked,
12170+
check_names=False)
12171+
assert_frame_equal(df3.stack(level=[2, 0]), exp_animal_stacked,
12172+
check_names=False)
12173+
assert_frame_equal(df3.stack(level=[2, 1]), exp_hair_stacked,
12174+
check_names=False)
12175+
12176+
1211312177
def test_unstack_bool(self):
1211412178
df = DataFrame([False, False],
1211512179
index=MultiIndex.from_arrays([['a', 'b'], ['c', 'l']]),

0 commit comments

Comments
 (0)