diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 812ea366b704a..4bb1e9efffff6 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -229,6 +229,9 @@ Other API Changes ^^^^^^^^^^^^^^^^^ - :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`) +- Accessing a level of a ``MultiIndex`` with a duplicate name (e.g. in + :meth:~MultiIndex.get_level_values) now raises a ``ValueError`` instead of + a ``KeyError`` (:issue:`21678`). - Invalid construction of ``IntervalDtype`` will now always raise a ``TypeError`` rather than a ``ValueError`` if the subdtype is invalid (:issue:`21185`) - Trying to reindex a ``DataFrame`` with a non unique ``MultiIndex`` now raises a ``ValueError`` instead of an ``Exception`` (:issue:`21770`) - diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e143e5fdebfb1..b3bf7abe97279 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -19,6 +19,7 @@ is_categorical_dtype, is_object_dtype, is_hashable, + is_integer, is_iterator, is_list_like, pandas_dtype, @@ -757,14 +758,14 @@ def _from_elements(values, labels=None, levels=None, names=None, return MultiIndex(levels, labels, names, sortorder=sortorder) def _get_level_number(self, level): + count = self.names.count(level) + if (count > 1) and not is_integer(level): + raise ValueError('The name %s occurs multiple times, use a ' + 'level number' % level) try: - count = self.names.count(level) - if count > 1: - raise ValueError('The name %s occurs multiple times, use a ' - 'level number' % level) level = self.names.index(level) except ValueError: - if not isinstance(level, int): + if not is_integer(level): raise KeyError('Level %s not found' % str(level)) elif level < 0: level += self.nlevels @@ -2879,13 +2880,6 @@ def isin(self, values, level=None): else: return np.lib.arraysetops.in1d(labs, sought_labels) - def _reference_duplicate_name(self, name): - """ - Returns True if the name refered to in self.names is duplicated. - """ - # count the times name equals an element in self.names. - return sum(name == n for n in self.names) > 1 - MultiIndex._add_numeric_methods_disabled() MultiIndex._add_numeric_methods_add_sub_disabled() diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 089634fb8e315..d5d2e594b8d6b 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -114,12 +114,6 @@ def __init__(self, values, index, level=-1, value_columns=None, self.index = index.remove_unused_levels() - if isinstance(self.index, MultiIndex): - if index._reference_duplicate_name(level): - msg = ("Ambiguous reference to {level}. The index " - "names are not unique.".format(level=level)) - raise ValueError(msg) - self.level = self.index._get_level_number(level) # when index includes `nan`, need to lift levels/strides by 1 @@ -533,12 +527,6 @@ def factorize(index): N, K = frame.shape - if isinstance(frame.columns, MultiIndex): - if frame.columns._reference_duplicate_name(level): - msg = ("Ambiguous reference to {level}. The column " - "names are not unique.".format(level=level)) - raise ValueError(msg) - # Will also convert negative level numbers and check if out of bounds. level_num = frame.columns._get_level_number(level) diff --git a/pandas/tests/indexes/multi/test_unique_and_duplicates.py b/pandas/tests/indexes/multi/test_unique_and_duplicates.py index a97d84ace9602..c1000e5b6e0f6 100644 --- a/pandas/tests/indexes/multi/test_unique_and_duplicates.py +++ b/pandas/tests/indexes/multi/test_unique_and_duplicates.py @@ -255,5 +255,5 @@ def test_unique_na(): def test_duplicate_level_names_access_raises(idx): idx.names = ['foo', 'foo'] - tm.assert_raises_regex(KeyError, 'Level foo not found', + tm.assert_raises_regex(ValueError, 'name foo occurs multiple times', idx._get_level_number, 'foo')