Skip to content

Commit e9b1a10

Browse files
committed
Merge pull request #10433 from behzadnouri/stack-dupl-indx
BUG: closes bug in stack when index is not unique
2 parents 5a4d60f + 32f5517 commit e9b1a10

File tree

3 files changed

+54
-7
lines changed

3 files changed

+54
-7
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ Bug Fixes
561561
- Bug in ``Table.select_column`` where name is not preserved (:issue:`10392`)
562562
- Bug in ``offsets.generate_range`` where ``start`` and ``end`` have finer precision than ``offset`` (:issue:`9907`)
563563
- Bug in ``pd.rolling_*`` where ``Series.name`` would be lost in the output (:issue:`10565`)
564+
- Bug in ``stack`` when index or columns are not unique. (:issue:`10417`)
564565

565566

566567

pandas/core/reshape.py

+15-7
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,12 @@ def stack(frame, level=-1, dropna=True):
461461
-------
462462
stacked : Series
463463
"""
464+
def factorize(index):
465+
if index.is_unique:
466+
return index, np.arange(len(index))
467+
cat = Categorical(index, ordered=True)
468+
return cat.categories, cat.codes
469+
464470
N, K = frame.shape
465471
if isinstance(frame.columns, MultiIndex):
466472
if frame.columns._reference_duplicate_name(level):
@@ -475,20 +481,22 @@ def stack(frame, level=-1, dropna=True):
475481
return _stack_multi_columns(frame, level_num=level_num, dropna=dropna)
476482
elif isinstance(frame.index, MultiIndex):
477483
new_levels = list(frame.index.levels)
478-
new_levels.append(frame.columns)
479-
480484
new_labels = [lab.repeat(K) for lab in frame.index.labels]
481-
new_labels.append(np.tile(np.arange(K), N).ravel())
485+
486+
clev, clab = factorize(frame.columns)
487+
new_levels.append(clev)
488+
new_labels.append(np.tile(clab, N).ravel())
482489

483490
new_names = list(frame.index.names)
484491
new_names.append(frame.columns.name)
485492
new_index = MultiIndex(levels=new_levels, labels=new_labels,
486493
names=new_names, verify_integrity=False)
487494
else:
488-
ilabels = np.arange(N).repeat(K)
489-
clabels = np.tile(np.arange(K), N).ravel()
490-
new_index = MultiIndex(levels=[frame.index, frame.columns],
491-
labels=[ilabels, clabels],
495+
levels, (ilab, clab) = \
496+
zip(*map(factorize, (frame.index, frame.columns)))
497+
labels = ilab.repeat(K), np.tile(clab, N).ravel()
498+
new_index = MultiIndex(levels=levels,
499+
labels=labels,
492500
names=[frame.index.name, frame.columns.name],
493501
verify_integrity=False)
494502

pandas/tests/test_multilevel.py

+38
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,44 @@ def test_stack(self):
964964
result = self.ymd.unstack(0).stack(-2)
965965
expected = self.ymd.unstack(0).stack(0)
966966

967+
# GH10417
968+
def check(left, right):
969+
assert_series_equal(left, right)
970+
self.assertFalse(left.index.is_unique)
971+
li, ri = left.index, right.index
972+
for i in range(ri.nlevels):
973+
tm.assert_numpy_array_equal(li.levels[i], ri.levels[i])
974+
tm.assert_numpy_array_equal(li.labels[i], ri.labels[i])
975+
976+
df = DataFrame(np.arange(12).reshape(4, 3),
977+
index=list('abab'),
978+
columns=['1st', '2nd', '3rd'])
979+
980+
mi = MultiIndex(levels=[['a', 'b'], ['1st', '2nd', '3rd']],
981+
labels=[np.tile(np.arange(2).repeat(3), 2),
982+
np.tile(np.arange(3), 4)])
983+
984+
left, right = df.stack(), Series(np.arange(12), index=mi)
985+
check(left, right)
986+
987+
df.columns = ['1st', '2nd', '1st']
988+
mi = MultiIndex(levels=[['a', 'b'], ['1st', '2nd']],
989+
labels=[np.tile(np.arange(2).repeat(3), 2),
990+
np.tile([0, 1, 0], 4)])
991+
992+
left, right = df.stack(), Series(np.arange(12), index=mi)
993+
check(left, right)
994+
995+
tpls = ('a', 2), ('b', 1), ('a', 1), ('b', 2)
996+
df.index = MultiIndex.from_tuples(tpls)
997+
mi = MultiIndex(levels=[['a', 'b'], [1, 2], ['1st', '2nd']],
998+
labels=[np.tile(np.arange(2).repeat(3), 2),
999+
np.repeat([1, 0, 1], [3, 6, 3]),
1000+
np.tile([0, 1, 0], 4)])
1001+
1002+
left, right = df.stack(), Series(np.arange(12), index=mi)
1003+
check(left, right)
1004+
9671005
def test_unstack_odd_failure(self):
9681006
data = """day,time,smoker,sum,len
9691007
Fri,Dinner,No,8.25,3.

0 commit comments

Comments
 (0)