Skip to content

Commit 4ae90ae

Browse files
committed
BUG: Stacking with multiple mixed int/str levels
Add test case for mixed type stacking Used wrong var name in the assert Method to swap levels assuming ints are level numbers Fix _stack_multi_columns to deal with mixed strs/ints Extra testcases Add fix to the release notes Convert to label before swaplevel if possible Revert "Method to swap levels assuming ints are level numbers" This reverts commit 61f96fd3cb23cda9f9c7a6837b145ebd247a55cc. More test cases Use _convert_level_number() to sort columns
1 parent 9dd675b commit 4ae90ae

File tree

3 files changed

+100
-9
lines changed

3 files changed

+100
-9
lines changed

doc/source/whatsnew/v0.15.2.txt

+3
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,6 @@ Bug Fixes
9898
- Bug in `pd.infer_freq`/`DataFrame.inferred_freq` that prevented proper sub-daily frequency inference
9999
when the index contained DST days (:issue:`8772`).
100100
- Bug where index name was still used when plotting a series with ``use_index=False`` (:issue:`8558`).
101+
102+
- Bugs when trying to stack multiple columns, when some (or all)
103+
of the level names are numbers (:issue:`8584`).

pandas/core/reshape.py

+33-9
Original file line numberDiff line numberDiff line change
@@ -525,10 +525,10 @@ def stack(frame, level=-1, dropna=True):
525525
raise ValueError(msg)
526526

527527
# Will also convert negative level numbers and check if out of bounds.
528-
level = frame.columns._get_level_number(level)
528+
level_num = frame.columns._get_level_number(level)
529529

530530
if isinstance(frame.columns, MultiIndex):
531-
return _stack_multi_columns(frame, level=level, dropna=dropna)
531+
return _stack_multi_columns(frame, level_num=level_num, dropna=dropna)
532532
elif isinstance(frame.index, MultiIndex):
533533
new_levels = list(frame.index.levels)
534534
new_levels.append(frame.columns)
@@ -595,19 +595,43 @@ def stack_multiple(frame, level, dropna=True):
595595
return result
596596

597597

598-
def _stack_multi_columns(frame, level=-1, dropna=True):
598+
def _stack_multi_columns(frame, level_num=-1, dropna=True):
599+
def _convert_level_number(level_num, columns):
600+
"""
601+
Logic for converting the level number to something
602+
we can safely pass to swaplevel:
603+
604+
We generally want to convert the level number into
605+
a level name, except when columns do not have names,
606+
in which case we must leave as a level number
607+
"""
608+
if level_num in columns.names:
609+
return columns.names[level_num]
610+
else:
611+
if columns.names[level_num] is None:
612+
return level_num
613+
else:
614+
return columns.names[level_num]
615+
599616
this = frame.copy()
600617

601618
# this makes life much simpler
602-
if level != frame.columns.nlevels - 1:
619+
if level_num != frame.columns.nlevels - 1:
603620
# roll levels to put selected level at end
604621
roll_columns = this.columns
605-
for i in range(level, frame.columns.nlevels - 1):
606-
roll_columns = roll_columns.swaplevel(i, i + 1)
622+
for i in range(level_num, frame.columns.nlevels - 1):
623+
# Need to check if the ints conflict with level names
624+
lev1 = _convert_level_number(i, roll_columns)
625+
lev2 = _convert_level_number(i + 1, roll_columns)
626+
roll_columns = roll_columns.swaplevel(lev1, lev2)
607627
this.columns = roll_columns
608628

609629
if not this.columns.is_lexsorted():
610-
this = this.sortlevel(0, axis=1)
630+
# Workaround the edge case where 0 is one of the column names,
631+
# which interferes with trying to sort based on the first
632+
# level
633+
level_to_sort = _convert_level_number(0, this.columns)
634+
this = this.sortlevel(level_to_sort, axis=1)
611635

612636
# tuple list excluding level for grouping columns
613637
if len(frame.columns.levels) > 2:
@@ -660,9 +684,9 @@ def _stack_multi_columns(frame, level=-1, dropna=True):
660684
new_labels = [np.arange(N).repeat(levsize)]
661685
new_names = [this.index.name] # something better?
662686

663-
new_levels.append(frame.columns.levels[level])
687+
new_levels.append(frame.columns.levels[level_num])
664688
new_labels.append(np.tile(np.arange(levsize), N))
665-
new_names.append(frame.columns.names[level])
689+
new_names.append(frame.columns.names[level_num])
666690

667691
new_index = MultiIndex(levels=new_levels, labels=new_labels,
668692
names=new_names, verify_integrity=False)

pandas/tests/test_frame.py

+64
Original file line numberDiff line numberDiff line change
@@ -12110,6 +12110,70 @@ def test_stack_ints(self):
1211012110
df_named.stack(level=1).stack(level=1)
1211112111
)
1211212112

12113+
def test_stack_mixed_levels(self):
12114+
columns = MultiIndex.from_tuples(
12115+
[('A', 'cat', 'long'), ('B', 'cat', 'long'),
12116+
('A', 'dog', 'short'), ('B', 'dog', 'short')],
12117+
names=['exp', 'animal', 'hair_length']
12118+
)
12119+
df = DataFrame(randn(4, 4), columns=columns)
12120+
12121+
animal_hair_stacked = df.stack(level=['animal', 'hair_length'])
12122+
exp_hair_stacked = df.stack(level=['exp', 'hair_length'])
12123+
12124+
# GH #8584: Need to check that stacking works when a number
12125+
# is passed that is both a level name and in the range of
12126+
# the level numbers
12127+
df2 = df.copy()
12128+
df2.columns.names = ['exp', 'animal', 1]
12129+
assert_frame_equal(df2.stack(level=['animal', 1]),
12130+
animal_hair_stacked, check_names=False)
12131+
assert_frame_equal(df2.stack(level=['exp', 1]),
12132+
exp_hair_stacked, check_names=False)
12133+
12134+
# When mixed types are passed and the ints are not level
12135+
# names, raise
12136+
self.assertRaises(ValueError, df2.stack, level=['animal', 0])
12137+
12138+
# GH #8584: Having 0 in the level names could raise a
12139+
# strange error about lexsort depth
12140+
df3 = df.copy()
12141+
df3.columns.names = ['exp', 'animal', 0]
12142+
assert_frame_equal(df3.stack(level=['animal', 0]),
12143+
animal_hair_stacked, check_names=False)
12144+
12145+
def test_stack_int_level_names(self):
12146+
columns = MultiIndex.from_tuples(
12147+
[('A', 'cat', 'long'), ('B', 'cat', 'long'),
12148+
('A', 'dog', 'short'), ('B', 'dog', 'short')],
12149+
names=['exp', 'animal', 'hair_length']
12150+
)
12151+
df = DataFrame(randn(4, 4), columns=columns)
12152+
12153+
exp_animal_stacked = df.stack(level=['exp', 'animal'])
12154+
animal_hair_stacked = df.stack(level=['animal', 'hair_length'])
12155+
exp_hair_stacked = df.stack(level=['exp', 'hair_length'])
12156+
12157+
df2 = df.copy()
12158+
df2.columns.names = [0, 1, 2]
12159+
assert_frame_equal(df2.stack(level=[1, 2]), animal_hair_stacked,
12160+
check_names=False )
12161+
assert_frame_equal(df2.stack(level=[0, 1]), exp_animal_stacked,
12162+
check_names=False)
12163+
assert_frame_equal(df2.stack(level=[0, 2]), exp_hair_stacked,
12164+
check_names=False)
12165+
12166+
# Out-of-order int column names
12167+
df3 = df.copy()
12168+
df3.columns.names = [2, 0, 1]
12169+
assert_frame_equal(df3.stack(level=[0, 1]), animal_hair_stacked,
12170+
check_names=False)
12171+
assert_frame_equal(df3.stack(level=[2, 0]), exp_animal_stacked,
12172+
check_names=False)
12173+
assert_frame_equal(df3.stack(level=[2, 1]), exp_hair_stacked,
12174+
check_names=False)
12175+
12176+
1211312177
def test_unstack_bool(self):
1211412178
df = DataFrame([False, False],
1211512179
index=MultiIndex.from_arrays([['a', 'b'], ['c', 'l']]),

0 commit comments

Comments
 (0)