Skip to content

Commit 0a84569

Browse files
committed
ENH/API: DataFrame.stack() supports level=None and sequentially=True/False.
1 parent 484f668 commit 0a84569

File tree

5 files changed

+381
-120
lines changed

5 files changed

+381
-120
lines changed

pandas/core/frame.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -3392,7 +3392,7 @@ def pivot(self, index=None, columns=None, values=None):
33923392
from pandas.core.reshape import pivot
33933393
return pivot(self, index=index, columns=columns, values=values)
33943394

3395-
def stack(self, level=-1, dropna=True):
3395+
def stack(self, level=-1, dropna=True, sequentially=True):
33963396
"""
33973397
Pivot a level of the (possibly hierarchical) column labels, returning a
33983398
DataFrame (or Series in the case of an object with a single level of
@@ -3402,11 +3402,15 @@ def stack(self, level=-1, dropna=True):
34023402
34033403
Parameters
34043404
----------
3405-
level : int, string, or list of these, default last level
3406-
Level(s) to stack, can pass level name
3405+
level : int, string, list of these, or None; default -1 (last level)
3406+
Level(s) to stack, can pass level name(s).
3407+
None specifies all column levels, i.e. list(range(columns.nlevels)).
34073408
dropna : boolean, default True
34083409
Whether to drop rows in the resulting Frame/Series with no valid
34093410
values
3411+
sequentially : boolean, default True
3412+
When level is a list (or None), whether the multiple column levels
3413+
should be stacked sequentially (if True) or simultaneously (if False).
34103414
34113415
Examples
34123416
----------
@@ -3425,14 +3429,15 @@ def stack(self, level=-1, dropna=True):
34253429
-------
34263430
stacked : DataFrame or Series
34273431
"""
3428-
from pandas.core.reshape import stack, stack_multiple
3432+
from pandas.core.reshape import stack_levels_sequentially, stack_multi_levels_simultaneously
34293433

3430-
if isinstance(level, (tuple, list)):
3431-
return stack_multiple(self, level, dropna=dropna)
3434+
level_num = self.columns._get_level_numbers(level, allow_mixed_names_and_numbers=False)
3435+
if isinstance(level_num, (tuple, list, set)) and isinstance(self.columns, MultiIndex) and (not sequentially):
3436+
return stack_multi_levels_simultaneously(self, level_num, dropna=dropna)
34323437
else:
3433-
return stack(self, level, dropna=dropna)
3438+
return stack_levels_sequentially(self, level_num, dropna=dropna)
34343439

3435-
def unstack(self, level=-1):
3440+
def unstack(self, level=-1, sequentially=True):
34363441
"""
34373442
Pivot a level of the (necessarily hierarchical) index labels, returning
34383443
a DataFrame having a new level of column labels whose inner-most level
@@ -3443,8 +3448,12 @@ def unstack(self, level=-1):
34433448
34443449
Parameters
34453450
----------
3446-
level : int, string, or list of these, default -1 (last level)
3447-
Level(s) of index to unstack, can pass level name
3451+
level : int, string, list of these, or None; default -1 (last level)
3452+
Level(s) of index to unstack, can pass level name(s).
3453+
None specifies all index levels, i.e. list(range(index.nlevels)).
3454+
sequentially : boolean, default True
3455+
When level is a list (or None), whether the multiple index levels
3456+
should be stacked sequentially (if True) or simultaneously (if False).
34483457
34493458
See also
34503459
--------

pandas/core/index.py

+41-23
Original file line numberDiff line numberDiff line change
@@ -824,10 +824,26 @@ def _validate_index_level(self, level):
824824
raise KeyError('Level %s must be same as name (%s)'
825825
% (level, self.name))
826826

827-
def _get_level_number(self, level):
827+
def _get_level_number(self, level, ignore_names=False):
828+
if ignore_names and (not isinstance(level, int)):
829+
raise KeyError('Level %s not found' % str(level))
828830
self._validate_index_level(level)
829831
return 0
830832

833+
def _get_level_numbers(self, levels, allow_mixed_names_and_numbers=False):
834+
if levels is None:
835+
return list(range(self.nlevels))
836+
elif isinstance(levels, (list, tuple, set)):
837+
if (not allow_mixed_names_and_numbers) and (not all(lev in self.names for lev in levels)):
838+
if all(isinstance(lev, int) for lev in levels):
839+
return type(levels)(self._get_level_number(level, ignore_names=True) for level in levels)
840+
else:
841+
raise ValueError("level should contain all level names or all level numbers, "
842+
"not a mixture of the two.")
843+
return type(levels)(self._get_level_number(level) for level in levels)
844+
else:
845+
return self._get_level_number(levels)
846+
831847
@cache_readonly
832848
def inferred_type(self):
833849
""" return a string of the type inferred from the values """
@@ -3161,28 +3177,30 @@ def _from_elements(values, labels=None, levels=None, names=None,
31613177
sortorder=None):
31623178
return MultiIndex(levels, labels, names, sortorder=sortorder)
31633179

3164-
def _get_level_number(self, level):
3165-
try:
3180+
def _get_level_number(self, level, ignore_names=False):
3181+
if not ignore_names:
31663182
count = self.names.count(level)
31673183
if count > 1:
31683184
raise ValueError('The name %s occurs multiple times, use a '
31693185
'level number' % level)
3170-
level = self.names.index(level)
3171-
except ValueError:
3172-
if not isinstance(level, int):
3173-
raise KeyError('Level %s not found' % str(level))
3174-
elif level < 0:
3175-
level += self.nlevels
3176-
if level < 0:
3177-
orig_level = level - self.nlevels
3178-
raise IndexError(
3179-
'Too many levels: Index has only %d levels, '
3180-
'%d is not a valid level number' % (self.nlevels, orig_level)
3181-
)
3182-
# Note: levels are zero-based
3183-
elif level >= self.nlevels:
3184-
raise IndexError('Too many levels: Index has only %d levels, '
3185-
'not %d' % (self.nlevels, level + 1))
3186+
try:
3187+
return self.names.index(level)
3188+
except ValueError:
3189+
pass
3190+
if not isinstance(level, int):
3191+
raise KeyError('Level %s not found' % str(level))
3192+
elif level < 0:
3193+
level += self.nlevels
3194+
if level < 0:
3195+
orig_level = level - self.nlevels
3196+
raise IndexError(
3197+
'Too many levels: Index has only %d levels, '
3198+
'%d is not a valid level number' % (self.nlevels, orig_level)
3199+
)
3200+
# Note: levels are zero-based
3201+
elif level >= self.nlevels:
3202+
raise IndexError('Too many levels: Index has only %d levels, '
3203+
'not %d' % (self.nlevels, level + 1))
31863204
return level
31873205

31883206
_tuples = None
@@ -4852,7 +4870,7 @@ def _trim_front(strings):
48524870

48534871

48544872
def _sanitize_and_check(indexes):
4855-
kinds = list(set([type(index) for index in indexes]))
4873+
kinds = list(set(type(index) for index in indexes))
48564874

48574875
if list in kinds:
48584876
if len(kinds) > 1:
@@ -4873,11 +4891,11 @@ def _get_consensus_names(indexes):
48734891

48744892
# find the non-none names, need to tupleify to make
48754893
# the set hashable, then reverse on return
4876-
consensus_names = set([
4894+
consensus_names = set(
48774895
tuple(i.names) for i in indexes if all(n is not None for n in i.names)
4878-
])
4896+
)
48794897
if len(consensus_names) == 1:
4880-
return list(list(consensus_names)[0])
4898+
return list(consensus_names.pop())
48814899
return [None] * indexes[0].nlevels
48824900

48834901

0 commit comments

Comments
 (0)