Skip to content

Commit 782afbe

Browse files
committed
BUG: Fix for multiple ints as levels in DataFrame.stack()
Fix for multiple level numbers If passed levels match level names, use them directly Add test cases for multiple stacking Add check for out of range negative level num in _get_level_number() Use _get_level_number() to do validation and conversion Explain why level list isn't iterated over directly Raise exception on fall-through case Add test cases for raising exceptions Use _get_level_number() to convert and validate negative levels Add changes to release notes Fix Python 2.6 build issue: use assertRaisesRegexp from testing module Add tests for out of bounds level numbers Move API change note to correct section Add blank line after if/elifs Add blank line between sections Too many blank lines between functions Add multiple stacking examples Reference to new examples in What's new docs More blank lines between sections Remove unused itertools import
1 parent 34cecd8 commit 782afbe

File tree

7 files changed

+122
-8
lines changed

7 files changed

+122
-8
lines changed

doc/source/reshaping.rst

+24
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,34 @@ the level numbers:
160160
161161
stacked.unstack('second')
162162
163+
.. _reshaping.stack_multiple:
164+
163165
You may also stack or unstack more than one level at a time by passing a list
164166
of levels, in which case the end result is as if each level in the list were
165167
processed individually.
166168

169+
.. ipython:: python
170+
171+
columns = MultiIndex.from_tuples([
172+
('A', 'cat', 'long'), ('B', 'cat', 'long'),
173+
('A', 'dog', 'short'), ('B', 'dog', 'short')
174+
],
175+
names=['exp', 'animal', 'hair_length']
176+
)
177+
df = DataFrame(randn(4, 4), columns=columns)
178+
df
179+
180+
df.stack(level=['animal', 'hair_length'])
181+
182+
The list of levels can contain either level names or level numbers (but
183+
not a mixture of the two).
184+
185+
.. ipython:: python
186+
187+
# df.stack(level=['animal', 'hair_length'])
188+
# from above is equivalent to:
189+
df.stack(level=[1, 2])
190+
167191
These functions are intelligent about handling missing data and do not expect
168192
each subgroup within the hierarchical index to have the same set of labels.
169193
They also can handle the index being unsorted (but you can make it sorted by

doc/source/v0.15.0.txt

+5
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ users upgrade to this version.
3030
API changes
3131
~~~~~~~~~~~
3232

33+
- Passing multiple levels to `DataFrame.stack()` will now work when multiple level
34+
numbers are passed (:issue:`7660`), and will raise a ``ValueError`` when the
35+
levels aren't all level names or all level numbers. See
36+
:ref:`Reshaping by stacking and unstacking <reshaping.stack_multiple>`.
37+
3338
.. _whatsnew_0150.cat:
3439

3540
Categoricals in Series/DataFrame

pandas/core/frame.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -3311,13 +3311,10 @@ def stack(self, level=-1, dropna=True):
33113311
-------
33123312
stacked : DataFrame or Series
33133313
"""
3314-
from pandas.core.reshape import stack
3314+
from pandas.core.reshape import stack, stack_multiple
33153315

33163316
if isinstance(level, (tuple, list)):
3317-
result = self
3318-
for lev in level:
3319-
result = stack(result, lev, dropna=dropna)
3320-
return result
3317+
return stack_multiple(self, level, dropna=dropna)
33213318
else:
33223319
return stack(self, level, dropna=dropna)
33233320

pandas/core/index.py

+6
Original file line numberDiff line numberDiff line change
@@ -2490,6 +2490,12 @@ def _get_level_number(self, level):
24902490
raise KeyError('Level %s not found' % str(level))
24912491
elif level < 0:
24922492
level += self.nlevels
2493+
if level < 0:
2494+
orig_level = level - self.nlevels
2495+
raise IndexError(
2496+
'Too many levels: Index has only %d levels, '
2497+
'%d is not a valid level number' % (self.nlevels, orig_level)
2498+
)
24932499
# Note: levels are zero-based
24942500
elif level >= self.nlevels:
24952501
raise IndexError('Too many levels: Index has only %d levels, '

pandas/core/reshape.py

+40-3
Original file line numberDiff line numberDiff line change
@@ -513,9 +513,7 @@ def stack(frame, level=-1, dropna=True):
513513
"names are not unique.".format(level))
514514
raise ValueError(msg)
515515

516-
if isinstance(level, int) and level < 0:
517-
level += frame.columns.nlevels
518-
516+
# Will also convert negative level numbers and check if out of bounds.
519517
level = frame.columns._get_level_number(level)
520518

521519
if isinstance(frame.columns, MultiIndex):
@@ -547,6 +545,45 @@ def stack(frame, level=-1, dropna=True):
547545
return Series(new_values, index=new_index)
548546

549547

548+
def stack_multiple(frame, level, dropna=True):
549+
# If all passed levels match up to column names, no
550+
# ambiguity about what to do
551+
if all(lev in frame.columns.names for lev in level):
552+
result = frame
553+
for lev in level:
554+
result = stack(result, lev, dropna=dropna)
555+
556+
# Otherwise, level numbers may change as each successive level is stacked
557+
elif all(isinstance(lev, int) for lev in level):
558+
# As each stack is done, the level numbers decrease, so we need
559+
# to account for that when level is a sequence of ints
560+
result = frame
561+
# _get_level_number() checks level numbers are in range and converts
562+
# negative numbers to positive
563+
level = [frame.columns._get_level_number(lev) for lev in level]
564+
565+
# Can't iterate directly through level as we might need to change
566+
# values as we go
567+
for index in range(len(level)):
568+
lev = level[index]
569+
result = stack(result, lev, dropna=dropna)
570+
# Decrement all level numbers greater than current, as these
571+
# have now shifted down by one
572+
updated_level = []
573+
for other in level:
574+
if other > lev:
575+
updated_level.append(other - 1)
576+
else:
577+
updated_level.append(other)
578+
level = updated_level
579+
580+
else:
581+
raise ValueError("level should contain all level names or all level numbers, "
582+
"not a mixture of the two.")
583+
584+
return result
585+
586+
550587
def _stack_multi_columns(frame, level=-1, dropna=True):
551588
this = frame.copy()
552589

pandas/tests/test_frame.py

+23
Original file line numberDiff line numberDiff line change
@@ -11725,6 +11725,29 @@ def test_stack_unstack(self):
1172511725
assert_frame_equal(unstacked_cols.T, self.frame)
1172611726
assert_frame_equal(unstacked_cols_df['bar'].T, self.frame)
1172711727

11728+
def test_stack_ints(self):
11729+
df = DataFrame(
11730+
np.random.randn(30, 27),
11731+
columns=MultiIndex.from_tuples(
11732+
list(itertools.product(range(3), repeat=3))
11733+
)
11734+
)
11735+
assert_frame_equal(
11736+
df.stack(level=[1, 2]),
11737+
df.stack(level=1).stack(level=1)
11738+
)
11739+
assert_frame_equal(
11740+
df.stack(level=[-2, -1]),
11741+
df.stack(level=1).stack(level=1)
11742+
)
11743+
11744+
df_named = df.copy()
11745+
df_named.columns.set_names(range(3), inplace=True)
11746+
assert_frame_equal(
11747+
df_named.stack(level=[1, 2]),
11748+
df_named.stack(level=1).stack(level=1)
11749+
)
11750+
1172811751
def test_unstack_bool(self):
1172911752
df = DataFrame([False, False],
1173011753
index=MultiIndex.from_arrays([['a', 'b'], ['c', 'l']]),

pandas/tests/test_multilevel.py

+22
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,12 @@ def test_count_level_corner(self):
834834
columns=df.columns).fillna(0).astype(np.int64)
835835
assert_frame_equal(result, expected)
836836

837+
def test_get_level_number_out_of_bounds(self):
838+
with assertRaisesRegexp(IndexError, "Too many levels"):
839+
self.frame.index._get_level_number(2)
840+
with assertRaisesRegexp(IndexError, "not a valid level number"):
841+
self.frame.index._get_level_number(-3)
842+
837843
def test_unstack(self):
838844
# just check that it works for now
839845
unstacked = self.ymd.unstack()
@@ -1005,6 +1011,22 @@ def test_stack_unstack_multiple(self):
10051011
expected = self.ymd.unstack(2).unstack(1).dropna(axis=1, how='all')
10061012
assert_frame_equal(unstacked, expected.ix[:, unstacked.columns])
10071013

1014+
def test_stack_names_and_numbers(self):
1015+
unstacked = self.ymd.unstack(['year', 'month'])
1016+
1017+
# Can't use mixture of names and numbers to stack
1018+
with assertRaisesRegexp(ValueError, "level should contain"):
1019+
unstacked.stack([0, 'month'])
1020+
1021+
def test_stack_multiple_out_of_bounds(self):
1022+
# nlevels == 3
1023+
unstacked = self.ymd.unstack(['year', 'month'])
1024+
1025+
with assertRaisesRegexp(IndexError, "Too many levels"):
1026+
unstacked.stack([2, 3])
1027+
with assertRaisesRegexp(IndexError, "not a valid level number"):
1028+
unstacked.stack([-4, -3])
1029+
10081030
def test_unstack_period_series(self):
10091031
# GH 4342
10101032
idx1 = pd.PeriodIndex(['2013-01', '2013-01', '2013-02', '2013-02',

0 commit comments

Comments
 (0)