Skip to content

Commit cd9c777

Browse files
committed
ENH/API: DataFrame.stack() support for level=None, sequentially=True/False, and NaN level values.
1 parent 7d90236 commit cd9c777

File tree

7 files changed

+805
-274
lines changed

7 files changed

+805
-274
lines changed

pandas/core/frame.py

+65-11
Original file line numberDiff line numberDiff line change
@@ -3718,7 +3718,7 @@ def pivot(self, index=None, columns=None, values=None):
37183718
from pandas.core.reshape import pivot
37193719
return pivot(self, index=index, columns=columns, values=values)
37203720

3721-
def stack(self, level=-1, dropna=True):
3721+
def stack(self, level=-1, dropna=True, sequentially=True):
37223722
"""
37233723
Pivot a level of the (possibly hierarchical) column labels, returning a
37243724
DataFrame (or Series in the case of an object with a single level of
@@ -3728,11 +3728,15 @@ def stack(self, level=-1, dropna=True):
37283728
37293729
Parameters
37303730
----------
3731-
level : int, string, or list of these, default last level
3732-
Level(s) to stack, can pass level name
3731+
level : int, string, list of these, or None; default -1 (last level)
3732+
Level(s) to stack, can pass level name(s).
3733+
None specifies all column levels, i.e. list(range(columns.nlevels)).
37333734
dropna : boolean, default True
37343735
Whether to drop rows in the resulting Frame/Series with no valid
37353736
values
3737+
sequentially : boolean, default True
3738+
When level is a list (or None), whether the multiple column levels
3739+
should be stacked sequentially (if True) or simultaneously (if False).
37363740
37373741
Examples
37383742
----------
@@ -3751,14 +3755,20 @@ def stack(self, level=-1, dropna=True):
37513755
-------
37523756
stacked : DataFrame or Series
37533757
"""
3754-
from pandas.core.reshape import stack, stack_multiple
3758+
from pandas.core.reshape import stack_levels_sequentially, stack_multi_levels_simultaneously
37553759

3756-
if isinstance(level, (tuple, list)):
3757-
return stack_multiple(self, level, dropna=dropna)
3760+
level_nums = self.columns._get_level_numbers(level, allow_mixed_names_and_numbers=False)
3761+
if level_nums == []:
3762+
if dropna:
3763+
return self.dropna(axis=0, how='all')
3764+
else:
3765+
return self
3766+
elif (not sequentially) and isinstance(self.columns, MultiIndex):
3767+
return stack_multi_levels_simultaneously(self, level_nums, dropna=dropna)
37583768
else:
3759-
return stack(self, level, dropna=dropna)
3769+
return stack_levels_sequentially(self, level_nums, dropna=dropna)
37603770

3761-
def unstack(self, level=-1):
3771+
def unstack(self, level=-1, dropna=False, sequentially=False):
37623772
"""
37633773
Pivot a level of the (necessarily hierarchical) index labels, returning
37643774
a DataFrame having a new level of column labels whose inner-most level
@@ -3769,8 +3779,15 @@ def unstack(self, level=-1):
37693779
37703780
Parameters
37713781
----------
3772-
level : int, string, or list of these, default -1 (last level)
3773-
Level(s) of index to unstack, can pass level name
3782+
level : int, string, list of these, or None; default -1 (last level)
3783+
Level(s) of index to unstack, can pass level name(s).
3784+
None specifies all index levels, i.e. list(range(index.nlevels)).
3785+
dropna : boolean, default False
3786+
Whether to drop columns in the resulting Frame/Series with no valid
3787+
values
3788+
sequentially : boolean, default True
3789+
When level is a list (or None), whether the multiple index levels
3790+
should be stacked sequentially (if True) or simultaneously (if False).
37743791
37753792
See also
37763793
--------
@@ -3812,7 +3829,44 @@ def unstack(self, level=-1):
38123829
unstacked : DataFrame or Series
38133830
"""
38143831
from pandas.core.reshape import unstack
3815-
return unstack(self, level)
3832+
3833+
level_nums = self.index._get_level_numbers(level, allow_mixed_names_and_numbers=False)
3834+
if level_nums == []:
3835+
if dropna:
3836+
return self.dropna(axis=1, how='all')
3837+
else:
3838+
return self
3839+
if sequentially and isinstance(level_nums, list) and (len(level_nums) > 1):
3840+
result = self
3841+
# Adjust level_nums to account for the fact that levels move "up"
3842+
# as a result of stacking of earlier levels.
3843+
adjusted_level_nums = [x - sum((y < x) for y in level_nums[:i])
3844+
for i, x in enumerate(level_nums)]
3845+
for level_num in adjusted_level_nums:
3846+
result = unstack(result, level_num)
3847+
else:
3848+
result = unstack(self, level_nums)
3849+
3850+
if isinstance(result, DataFrame):
3851+
# fix dtypes, if necessary
3852+
desired_dtypes = self.dtypes.values.repeat(len(result.columns) // len(self.columns))
3853+
result_dtypes = result.dtypes.values
3854+
for i, c in enumerate(result.columns):
3855+
if result_dtypes[i] != desired_dtypes[i]:
3856+
if result_dtypes[i] == np.object:
3857+
# use default Series constructor to set type
3858+
result[c] = Series(result[c].values.tolist(), index=result.index)
3859+
else:
3860+
# try to convert type directly
3861+
result[c] = result[c].astype(desired_dtypes[i], raise_on_error=False)
3862+
# drop empty columns, if necessary
3863+
if dropna:
3864+
result = result.dropna(axis=1, how='all')
3865+
else:
3866+
if dropna:
3867+
result = result.dropna()
3868+
3869+
return result
38163870

38173871
#----------------------------------------------------------------------
38183872
# Time series-related

pandas/core/index.py

+76-27
Original file line numberDiff line numberDiff line change
@@ -1033,7 +1033,7 @@ def _validate_index_level(self, level):
10331033
verification must be done like in MultiIndex.
10341034
10351035
"""
1036-
if isinstance(level, int):
1036+
if com.is_integer(level):
10371037
if level < 0 and level != -1:
10381038
raise IndexError("Too many levels: Index has only 1 level,"
10391039
" %d is not a valid level number" % (level,))
@@ -1045,10 +1045,44 @@ def _validate_index_level(self, level):
10451045
raise KeyError('Level %s must be same as name (%s)'
10461046
% (level, self.name))
10471047

1048-
def _get_level_number(self, level):
1048+
def _get_level_number(self, level, ignore_names=False):
1049+
"""
1050+
Returns level number corresponding to level.
1051+
If level is a level name and ignore_names is False,
1052+
the level number corresponding to such level name is returned.
1053+
Otherwise level must be a number.
1054+
If level is a positive number, it is returned.
1055+
If level is a negative number, its sum with self.nlevels is returned.
1056+
"""
1057+
if ignore_names and (not com.is_integer(level)):
1058+
raise KeyError('Level %s not found' % str(level))
10491059
self._validate_index_level(level)
10501060
return 0
10511061

1062+
def _get_level_numbers(self, levels, allow_mixed_names_and_numbers=False):
1063+
"""
1064+
Returns level numbers corresponding to levels.
1065+
If levels is None, a list of all level numbers is returned.
1066+
If levels is a single number or level name,
1067+
then a single number is returned (using _get_level_number()).
1068+
If levels is a list of numbers or level names,
1069+
then a list of numbers is returned (each using _get_level_number()).
1070+
If allow_mixed_names_and_numbers is False, then levels must be
1071+
either all level numbers or all level names.
1072+
"""
1073+
if levels is None:
1074+
return list(range(self.nlevels))
1075+
elif isinstance(levels, (list, tuple, set)):
1076+
if (not allow_mixed_names_and_numbers) and (not all(lev in self.names for lev in levels)):
1077+
if all(isinstance(lev, int) for lev in levels):
1078+
return type(levels)(self._get_level_number(level, ignore_names=True) for level in levels)
1079+
else:
1080+
raise ValueError("level should contain all level names or all level numbers, "
1081+
"not a mixture of the two.")
1082+
return type(levels)(self._get_level_number(level) for level in levels)
1083+
else:
1084+
return self._get_level_number(levels)
1085+
10521086
@cache_readonly
10531087
def inferred_type(self):
10541088
""" return a string of the type inferred from the values """
@@ -4294,28 +4328,38 @@ def _from_elements(values, labels=None, levels=None, names=None,
42944328
sortorder=None):
42954329
return MultiIndex(levels, labels, names, sortorder=sortorder)
42964330

4297-
def _get_level_number(self, level):
4298-
try:
4331+
def _get_level_number(self, level, ignore_names=False):
4332+
"""
4333+
Returns level number corresponding to level.
4334+
If level is a level name and ignore_names is False,
4335+
the level number corresponding to such level name is returned.
4336+
Otherwise level must be a number.
4337+
If level is a positive number, it is returned.
4338+
If level is a negative number, its sum with self.nlevels is returned.
4339+
"""
4340+
if not ignore_names:
42994341
count = self.names.count(level)
43004342
if count > 1:
43014343
raise ValueError('The name %s occurs multiple times, use a '
43024344
'level number' % level)
4303-
level = self.names.index(level)
4304-
except ValueError:
4305-
if not isinstance(level, int):
4306-
raise KeyError('Level %s not found' % str(level))
4307-
elif level < 0:
4308-
level += self.nlevels
4309-
if level < 0:
4310-
orig_level = level - self.nlevels
4311-
raise IndexError(
4312-
'Too many levels: Index has only %d levels, '
4313-
'%d is not a valid level number' % (self.nlevels, orig_level)
4314-
)
4315-
# Note: levels are zero-based
4316-
elif level >= self.nlevels:
4317-
raise IndexError('Too many levels: Index has only %d levels, '
4318-
'not %d' % (self.nlevels, level + 1))
4345+
try:
4346+
return self.names.index(level)
4347+
except ValueError:
4348+
pass
4349+
if not com.is_integer(level):
4350+
raise KeyError('Level %s not found' % str(level))
4351+
elif level < 0:
4352+
level += self.nlevels
4353+
if level < 0:
4354+
orig_level = level - self.nlevels
4355+
raise IndexError(
4356+
'Too many levels: Index has only %d levels, '
4357+
'%d is not a valid level number' % (self.nlevels, orig_level)
4358+
)
4359+
# Note: levels are zero-based
4360+
elif level >= self.nlevels:
4361+
raise IndexError('Too many levels: Index has only %d levels, '
4362+
'not %d' % (self.nlevels, level + 1))
43194363
return level
43204364

43214365
_tuples = None
@@ -4891,14 +4935,16 @@ def _drop_from_level(self, labels, level):
48914935

48924936
return self[mask]
48934937

4894-
def droplevel(self, level=0):
4938+
def droplevel(self, level=0, ignore_names=False):
48954939
"""
48964940
Return Index with requested level removed. If MultiIndex has only 2
48974941
levels, the result will be of Index type not MultiIndex.
48984942
48994943
Parameters
49004944
----------
49014945
level : int/level name or list thereof
4946+
ignore_names : boolean, default True
4947+
If True, level must be an int or list thereof
49024948
49034949
Notes
49044950
-----
@@ -4916,7 +4962,7 @@ def droplevel(self, level=0):
49164962
new_labels = list(self.labels)
49174963
new_names = list(self.names)
49184964

4919-
levnums = sorted(self._get_level_number(lev) for lev in levels)[::-1]
4965+
levnums = sorted((self._get_level_number(lev, ignore_names) for lev in levels), reverse=True)
49204966

49214967
for i in levnums:
49224968
new_levels.pop(i)
@@ -4929,6 +4975,9 @@ def droplevel(self, level=0):
49294975
mask = new_labels[0] == -1
49304976
result = new_levels[0].take(new_labels[0])
49314977
if mask.any():
4978+
if result.is_integer():
4979+
# cannot store NaNs in an integer index, so promote to Float64Index
4980+
result = Float64Index(result.values, name=result.name)
49324981
result = result.putmask(mask, np.nan)
49334982

49344983
result.name = new_names[0]
@@ -5539,7 +5588,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels):
55395588

55405589
else:
55415590

5542-
loc = level_index.get_loc(key)
5591+
loc = -1 if com.is_float(key) and np.isnan(key) else level_index.get_loc(key)
55435592
if level > 0 or self.lexsort_depth == 0:
55445593
return np.array(labels == loc,dtype=bool)
55455594
else:
@@ -6050,7 +6099,7 @@ def _trim_front(strings):
60506099

60516100

60526101
def _sanitize_and_check(indexes):
6053-
kinds = list(set([type(index) for index in indexes]))
6102+
kinds = list(set(type(index) for index in indexes))
60546103

60556104
if list in kinds:
60566105
if len(kinds) > 1:
@@ -6071,11 +6120,11 @@ def _get_consensus_names(indexes):
60716120

60726121
# find the non-none names, need to tupleify to make
60736122
# the set hashable, then reverse on return
6074-
consensus_names = set([
6123+
consensus_names = set(
60756124
tuple(i.names) for i in indexes if all(n is not None for n in i.names)
6076-
])
6125+
)
60776126
if len(consensus_names) == 1:
6078-
return list(list(consensus_names)[0])
6127+
return list(consensus_names.pop())
60796128
return [None] * indexes[0].nlevels
60806129

60816130

0 commit comments

Comments
 (0)