Skip to content

BUG: Passing multiple levels to stack when having mixed integer/string level names (#8584) #8809

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 17, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.15.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,6 @@ Bug Fixes
- Bug in `pd.infer_freq`/`DataFrame.inferred_freq` that prevented proper sub-daily frequency inference
when the index contained DST days (:issue:`8772`).
- Bug where index name was still used when plotting a series with ``use_index=False`` (:issue:`8558`).

- Bugs when trying to stack multiple columns, when some (or all)
of the level names are numbers (:issue:`8584`).
42 changes: 33 additions & 9 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,10 +525,10 @@ def stack(frame, level=-1, dropna=True):
raise ValueError(msg)

# Will also convert negative level numbers and check if out of bounds.
level = frame.columns._get_level_number(level)
level_num = frame.columns._get_level_number(level)

if isinstance(frame.columns, MultiIndex):
return _stack_multi_columns(frame, level=level, dropna=dropna)
return _stack_multi_columns(frame, level_num=level_num, dropna=dropna)
elif isinstance(frame.index, MultiIndex):
new_levels = list(frame.index.levels)
new_levels.append(frame.columns)
Expand Down Expand Up @@ -595,19 +595,43 @@ def stack_multiple(frame, level, dropna=True):
return result


def _stack_multi_columns(frame, level=-1, dropna=True):
def _stack_multi_columns(frame, level_num=-1, dropna=True):
def _convert_level_number(level_num, columns):
"""
Logic for converting the level number to something
we can safely pass to swaplevel:

We generally want to convert the level number into
a level name, except when columns do not have names,
in which case we must leave as a level number
"""
if level_num in columns.names:
return columns.names[level_num]
else:
if columns.names[level_num] is None:
return level_num
else:
return columns.names[level_num]

this = frame.copy()

# this makes life much simpler
if level != frame.columns.nlevels - 1:
if level_num != frame.columns.nlevels - 1:
# roll levels to put selected level at end
roll_columns = this.columns
for i in range(level, frame.columns.nlevels - 1):
roll_columns = roll_columns.swaplevel(i, i + 1)
for i in range(level_num, frame.columns.nlevels - 1):
# Need to check if the ints conflict with level names
lev1 = _convert_level_number(i, roll_columns)
lev2 = _convert_level_number(i + 1, roll_columns)
roll_columns = roll_columns.swaplevel(lev1, lev2)
this.columns = roll_columns

if not this.columns.is_lexsorted():
this = this.sortlevel(0, axis=1)
# Workaround the edge case where 0 is one of the column names,
# which interferes with trying to sort based on the first
# level
level_to_sort = _convert_level_number(0, this.columns)
this = this.sortlevel(level_to_sort, axis=1)

# tuple list excluding level for grouping columns
if len(frame.columns.levels) > 2:
Expand Down Expand Up @@ -660,9 +684,9 @@ def _stack_multi_columns(frame, level=-1, dropna=True):
new_labels = [np.arange(N).repeat(levsize)]
new_names = [this.index.name] # something better?

new_levels.append(frame.columns.levels[level])
new_levels.append(frame.columns.levels[level_num])
new_labels.append(np.tile(np.arange(levsize), N))
new_names.append(frame.columns.names[level])
new_names.append(frame.columns.names[level_num])

new_index = MultiIndex(levels=new_levels, labels=new_labels,
names=new_names, verify_integrity=False)
Expand Down
64 changes: 64 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -12110,6 +12110,70 @@ def test_stack_ints(self):
df_named.stack(level=1).stack(level=1)
)

def test_stack_mixed_levels(self):
columns = MultiIndex.from_tuples(
[('A', 'cat', 'long'), ('B', 'cat', 'long'),
('A', 'dog', 'short'), ('B', 'dog', 'short')],
names=['exp', 'animal', 'hair_length']
)
df = DataFrame(randn(4, 4), columns=columns)

animal_hair_stacked = df.stack(level=['animal', 'hair_length'])
exp_hair_stacked = df.stack(level=['exp', 'hair_length'])

# GH #8584: Need to check that stacking works when a number
# is passed that is both a level name and in the range of
# the level numbers
df2 = df.copy()
df2.columns.names = ['exp', 'animal', 1]
assert_frame_equal(df2.stack(level=['animal', 1]),
animal_hair_stacked, check_names=False)
assert_frame_equal(df2.stack(level=['exp', 1]),
exp_hair_stacked, check_names=False)

# When mixed types are passed and the ints are not level
# names, raise
self.assertRaises(ValueError, df2.stack, level=['animal', 0])

# GH #8584: Having 0 in the level names could raise a
# strange error about lexsort depth
df3 = df.copy()
df3.columns.names = ['exp', 'animal', 0]
assert_frame_equal(df3.stack(level=['animal', 0]),
animal_hair_stacked, check_names=False)

def test_stack_int_level_names(self):
columns = MultiIndex.from_tuples(
[('A', 'cat', 'long'), ('B', 'cat', 'long'),
('A', 'dog', 'short'), ('B', 'dog', 'short')],
names=['exp', 'animal', 'hair_length']
)
df = DataFrame(randn(4, 4), columns=columns)

exp_animal_stacked = df.stack(level=['exp', 'animal'])
animal_hair_stacked = df.stack(level=['animal', 'hair_length'])
exp_hair_stacked = df.stack(level=['exp', 'hair_length'])

df2 = df.copy()
df2.columns.names = [0, 1, 2]
assert_frame_equal(df2.stack(level=[1, 2]), animal_hair_stacked,
check_names=False )
assert_frame_equal(df2.stack(level=[0, 1]), exp_animal_stacked,
check_names=False)
assert_frame_equal(df2.stack(level=[0, 2]), exp_hair_stacked,
check_names=False)

# Out-of-order int column names
df3 = df.copy()
df3.columns.names = [2, 0, 1]
assert_frame_equal(df3.stack(level=[0, 1]), animal_hair_stacked,
check_names=False)
assert_frame_equal(df3.stack(level=[2, 0]), exp_animal_stacked,
check_names=False)
assert_frame_equal(df3.stack(level=[2, 1]), exp_hair_stacked,
check_names=False)


def test_unstack_bool(self):
df = DataFrame([False, False],
index=MultiIndex.from_arrays([['a', 'b'], ['c', 'l']]),
Expand Down