From 8ea241933e8b1711bd15bea76406c8b7111d8f19 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 29 Jun 2018 11:09:41 +0200 Subject: [PATCH 1/5] API: referring to duplicate level name raises ValueError --- pandas/core/indexes/multi.py | 40 +++++++++++++----------------- pandas/core/reshape/reshape.py | 12 --------- pandas/tests/indexes/test_multi.py | 2 +- 3 files changed, 18 insertions(+), 36 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a2322348e1caa..d488931a4bfaa 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -755,26 +755,27 @@ def _from_elements(values, labels=None, levels=None, names=None, return MultiIndex(levels, labels, names, sortorder=sortorder) def _get_level_number(self, level): - try: + if not isinstance(level, int): count = self.names.count(level) if count > 1: raise ValueError('The name %s occurs multiple times, use a ' - 'level number' % level) - level = self.names.index(level) - except ValueError: - if not isinstance(level, int): + 'level number' % level) + try: + level = self.names.index(level) + except ValueError: raise KeyError('Level %s not found' % str(level)) - elif level < 0: - level += self.nlevels - if level < 0: - orig_level = level - self.nlevels - raise IndexError('Too many levels: Index has only %d ' - 'levels, %d is not a valid level number' % - (self.nlevels, orig_level)) - # Note: levels are zero-based - elif level >= self.nlevels: - raise IndexError('Too many levels: Index has only %d levels, ' - 'not %d' % (self.nlevels, level + 1)) + + if level < 0: + level += self.nlevels + if level < 0: + orig_level = level - self.nlevels + raise IndexError('Too many levels: Index has only %d ' + 'levels, %d is not a valid level number' % + (self.nlevels, orig_level)) + # Note: levels are zero-based + elif level >= self.nlevels: + raise IndexError('Too many levels: Index has only %d levels, ' + 'not %d' % (self.nlevels, level + 1)) return level _tuples = None @@ -2881,13 +2882,6 @@ def isin(self, values, level=None): else: return np.lib.arraysetops.in1d(labs, sought_labels) - def _reference_duplicate_name(self, name): - """ - Returns True if the name refered to in self.names is duplicated. - """ - # count the times name equals an element in self.names. - return sum(name == n for n in self.names) > 1 - MultiIndex._add_numeric_methods_disabled() MultiIndex._add_numeric_methods_add_sub_disabled() diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 3d9e84954a63b..2757e0797a410 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -115,12 +115,6 @@ def __init__(self, values, index, level=-1, value_columns=None, self.index = index.remove_unused_levels() - if isinstance(self.index, MultiIndex): - if index._reference_duplicate_name(level): - msg = ("Ambiguous reference to {level}. The index " - "names are not unique.".format(level=level)) - raise ValueError(msg) - self.level = self.index._get_level_number(level) # when index includes `nan`, need to lift levels/strides by 1 @@ -534,12 +528,6 @@ def factorize(index): N, K = frame.shape - if isinstance(frame.columns, MultiIndex): - if frame.columns._reference_duplicate_name(level): - msg = ("Ambiguous reference to {level}. The column " - "names are not unique.".format(level=level)) - raise ValueError(msg) - # Will also convert negative level numbers and check if out of bounds. level_num = frame.columns._get_level_number(level) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 1dc44677ab3ad..04a3bc334ee3c 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -675,7 +675,7 @@ def test_duplicate_level_names(self, names): def test_duplicate_level_names_access_raises(self): self.index.names = ['foo', 'foo'] - tm.assert_raises_regex(KeyError, 'Level foo not found', + tm.assert_raises_regex(ValueError, 'name foo occurs multiple times', self.index._get_level_number, 'foo') def assert_multiindex_copied(self, copy, original): From 7bf4ed9a5323aee08146b05daf71118b08c63ada Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 29 Jun 2018 14:44:01 +0200 Subject: [PATCH 2/5] keep original logic of falling back on positional for integer levels --- pandas/core/indexes/multi.py | 39 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d488931a4bfaa..295166b11c9c2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -755,27 +755,26 @@ def _from_elements(values, labels=None, levels=None, names=None, return MultiIndex(levels, labels, names, sortorder=sortorder) def _get_level_number(self, level): - if not isinstance(level, int): - count = self.names.count(level) - if count > 1: - raise ValueError('The name %s occurs multiple times, use a ' - 'level number' % level) - try: - level = self.names.index(level) - except ValueError: + count = self.names.count(level) + if (count > 1) and not isinstance(level, int): + raise ValueError('The name %s occurs multiple times, use a ' + 'level number' % level) + try: + level = self.names.index(level) + except ValueError: + if not isinstance(level, int): raise KeyError('Level %s not found' % str(level)) - - if level < 0: - level += self.nlevels - if level < 0: - orig_level = level - self.nlevels - raise IndexError('Too many levels: Index has only %d ' - 'levels, %d is not a valid level number' % - (self.nlevels, orig_level)) - # Note: levels are zero-based - elif level >= self.nlevels: - raise IndexError('Too many levels: Index has only %d levels, ' - 'not %d' % (self.nlevels, level + 1)) + elif level < 0: + level += self.nlevels + if level < 0: + orig_level = level - self.nlevels + raise IndexError('Too many levels: Index has only %d ' + 'levels, %d is not a valid level number' % + (self.nlevels, orig_level)) + # Note: levels are zero-based + elif level >= self.nlevels: + raise IndexError('Too many levels: Index has only %d levels, ' + 'not %d' % (self.nlevels, level + 1)) return level _tuples = None From 33354e6107600243095b79d42701b8d890f20427 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 2 Jul 2018 14:54:29 +0200 Subject: [PATCH 3/5] use is_integer --- pandas/core/indexes/multi.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 295166b11c9c2..819df1a0fed46 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -17,6 +17,7 @@ is_categorical_dtype, is_object_dtype, is_hashable, + is_integer, is_iterator, is_list_like, pandas_dtype, @@ -756,13 +757,13 @@ def _from_elements(values, labels=None, levels=None, names=None, def _get_level_number(self, level): count = self.names.count(level) - if (count > 1) and not isinstance(level, int): + if (count > 1) and not is_integer(level): raise ValueError('The name %s occurs multiple times, use a ' 'level number' % level) try: level = self.names.index(level) except ValueError: - if not isinstance(level, int): + if not is_integer(level): raise KeyError('Level %s not found' % str(level)) elif level < 0: level += self.nlevels From 2d5334bf8df4d406faf03eab4931b4e7e8c83977 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 2 Jul 2018 14:58:37 +0200 Subject: [PATCH 4/5] add whatsnew --- doc/source/whatsnew/v0.24.0.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 0ca5b9cdf1d57..2bbd15299488d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -168,7 +168,9 @@ Current Behavior: OverflowError: Trying to coerce negative values to unsigned integers - :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`) -- +- Accessing a level of a ``MultiIndex`` with a duplicate name (e.g. in + :meth:~MultiIndex.get_level_values) now raises a ValueError instead of + a KeyError (:issue:`21678`). - .. _whatsnew_0240.deprecations: From 450c6bd991842a0510f1487f9f58c456f2189902 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 8 Jul 2018 08:56:53 -0500 Subject: [PATCH 5/5] quoting --- doc/source/whatsnew/v0.24.0.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e292c247f3755..4bb1e9efffff6 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -230,8 +230,8 @@ Other API Changes - :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`) - Accessing a level of a ``MultiIndex`` with a duplicate name (e.g. in - :meth:~MultiIndex.get_level_values) now raises a ValueError instead of - a KeyError (:issue:`21678`). + :meth:~MultiIndex.get_level_values) now raises a ``ValueError`` instead of + a ``KeyError`` (:issue:`21678`). - Invalid construction of ``IntervalDtype`` will now always raise a ``TypeError`` rather than a ``ValueError`` if the subdtype is invalid (:issue:`21185`) - Trying to reindex a ``DataFrame`` with a non unique ``MultiIndex`` now raises a ``ValueError`` instead of an ``Exception`` (:issue:`21770`) -