Skip to content

Commit 0ee216f

Browse files
committed
add fix for bug pandas-dev#19029
As of version 0.23.0 MultiIndex throws an exception in case it contains duplicated level names. This can happen as a result of various groupby operations (pandas-dev#21075). This commit changes the behavior of groupby slightly: In case there are duplicated names contained in the index these names get suffixed by there corresonding position (i.e. [name,name] => [name0,name1])
1 parent 81358e8 commit 0ee216f

File tree

2 files changed

+21
-3
lines changed

2 files changed

+21
-3
lines changed

pandas/core/groupby/groupby.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import copy
88
from textwrap import dedent
99
from contextlib import contextmanager
10+
import pdb
1011

1112
from pandas.compat import (
1213
zip, range, lzip,
@@ -2298,7 +2299,18 @@ def levels(self):
22982299

22992300
@property
23002301
def names(self):
2301-
return [ping.name for ping in self.groupings]
2302+
# GH 19029
2303+
# add suffix to level name in case they contain duplicates (GH 19029):
2304+
orig_names = [ping.name for ping in self.groupings]
2305+
# if no names were assigned return the original names
2306+
if all(x is None for x in orig_names):
2307+
return orig_names
2308+
# in case duplicates are contained rename all of them
2309+
if len(set(orig_names)) < len(orig_names):
2310+
orig_names = [''.join([str(x),str(i)])
2311+
for i,x in enumerate(orig_names)]
2312+
2313+
return orig_names
23022314

23032315
def size(self):
23042316
"""

pandas/tests/groupby/test_categorical.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -558,9 +558,15 @@ def test_as_index():
558558
result = df.groupby(['cat', s], as_index=False, observed=True).sum()
559559
tm.assert_frame_equal(result, expected)
560560

561-
# GH18872: conflicting names in desired index
562-
with pytest.raises(ValueError):
561+
# GH 19029: conflicitng names should not raise a value error anymore
562+
raised=False
563+
try:
563564
df.groupby(['cat', s.rename('cat')], observed=True).sum()
565+
except ValueError as e:
566+
raised = True
567+
assert raised == False
568+
569+
564570

565571
# is original index dropped?
566572
group_columns = ['cat', 'A']

0 commit comments

Comments
 (0)