Skip to content

Commit 2063c1f

Browse files
committed
Merge pull request #8855 from seth-p/multiindex_stacking
BUG: DataFrame.stack(..., dropna=False) with partial MultiIndex.
2 parents f4b1c6d + c350118 commit 2063c1f

File tree

3 files changed

+57
-4
lines changed

3 files changed

+57
-4
lines changed

doc/source/whatsnew/v0.15.2.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@ Bug Fixes
145145

146146
- Bug where ``get_data_google``returned object dtypes (:issue:`3995`)
147147

148-
148+
- Bug in ``DataFrame.stack(..., dropna=False)`` when the DataFrame's ``columns`` is a ``MultiIndex``
149+
whose ``labels`` do not reference all its ``levels``. (:issue:`8844`)
149150

150151

151152
- BUG: Option context applies on __enter__ (:issue:`8514`)

pandas/core/reshape.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,9 @@ def _convert_level_number(level_num, columns):
648648
# time to ravel the values
649649
new_data = {}
650650
level_vals = this.columns.levels[-1]
651-
levsize = len(level_vals)
651+
level_labels = sorted(set(this.columns.labels[-1]))
652+
level_vals_used = level_vals[level_labels]
653+
levsize = len(level_labels)
652654
drop_cols = []
653655
for key in unique_groups:
654656
loc = this.columns.get_loc(key)
@@ -661,7 +663,7 @@ def _convert_level_number(level_num, columns):
661663
elif slice_len != levsize:
662664
chunk = this.ix[:, this.columns[loc]]
663665
chunk.columns = level_vals.take(chunk.columns.labels[-1])
664-
value_slice = chunk.reindex(columns=level_vals).values
666+
value_slice = chunk.reindex(columns=level_vals_used).values
665667
else:
666668
if frame._is_mixed_type:
667669
value_slice = this.ix[:, this.columns[loc]].values
@@ -685,7 +687,7 @@ def _convert_level_number(level_num, columns):
685687
new_names = [this.index.name] # something better?
686688

687689
new_levels.append(frame.columns.levels[level_num])
688-
new_labels.append(np.tile(np.arange(levsize), N))
690+
new_labels.append(np.tile(level_labels, N))
689691
new_names.append(frame.columns.names[level_num])
690692

691693
new_index = MultiIndex(levels=new_levels, labels=new_labels,

pandas/tests/test_frame.py

+50
Original file line numberDiff line numberDiff line change
@@ -12266,6 +12266,56 @@ def test_stack_datetime_column_multiIndex(self):
1226612266
expected = DataFrame([1, 2, 3, 4], index=eidx, columns=ecols)
1226712267
assert_frame_equal(result, expected)
1226812268

12269+
def test_stack_partial_multiIndex(self):
12270+
# GH 8844
12271+
def _test_stack_with_multiindex(multiindex):
12272+
df = DataFrame(np.arange(3 * len(multiindex)).reshape(3, len(multiindex)),
12273+
columns=multiindex)
12274+
for level in (-1, 0, 1, [0, 1], [1, 0]):
12275+
result = df.stack(level=level, dropna=False)
12276+
12277+
if isinstance(level, int):
12278+
# Stacking a single level should not make any all-NaN rows,
12279+
# so df.stack(level=level, dropna=False) should be the same
12280+
# as df.stack(level=level, dropna=True).
12281+
expected = df.stack(level=level, dropna=True)
12282+
if isinstance(expected, Series):
12283+
assert_series_equal(result, expected)
12284+
else:
12285+
assert_frame_equal(result, expected)
12286+
12287+
df.columns = MultiIndex.from_tuples(df.columns.get_values(),
12288+
names=df.columns.names)
12289+
expected = df.stack(level=level, dropna=False)
12290+
if isinstance(expected, Series):
12291+
assert_series_equal(result, expected)
12292+
else:
12293+
assert_frame_equal(result, expected)
12294+
12295+
full_multiindex = MultiIndex.from_tuples([('B', 'x'), ('B', 'z'),
12296+
('A', 'y'),
12297+
('C', 'x'), ('C', 'u')],
12298+
names=['Upper', 'Lower'])
12299+
for multiindex_columns in ([0, 1, 2, 3, 4],
12300+
[0, 1, 2, 3], [0, 1, 2, 4],
12301+
[0, 1, 2], [1, 2, 3], [2, 3, 4],
12302+
[0, 1], [0, 2], [0, 3],
12303+
[0], [2], [4]):
12304+
_test_stack_with_multiindex(full_multiindex[multiindex_columns])
12305+
if len(multiindex_columns) > 1:
12306+
multiindex_columns.reverse()
12307+
_test_stack_with_multiindex(full_multiindex[multiindex_columns])
12308+
12309+
df = DataFrame(np.arange(6).reshape(2, 3), columns=full_multiindex[[0, 1, 3]])
12310+
result = df.stack(dropna=False)
12311+
expected = DataFrame([[0, 2], [1, nan], [3, 5], [4, nan]],
12312+
index=MultiIndex(levels=[[0, 1], ['u', 'x', 'y', 'z']],
12313+
labels=[[0, 0, 1, 1], [1, 3, 1, 3]],
12314+
names=[None, 'Lower']),
12315+
columns=Index(['B', 'C'], name='Upper'),
12316+
dtype=df.dtypes[0])
12317+
assert_frame_equal(result, expected)
12318+
1226912319
def test_repr_with_mi_nat(self):
1227012320
df = DataFrame({'X': [1, 2]},
1227112321
index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']])

0 commit comments

Comments
 (0)