From 4093ff935e4ccfa37ab385096ad2d8e397221081 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 16 Sep 2016 14:47:31 -0400 Subject: [PATCH 1/9] BUG: `set_levels` set illegal levels. `MultiIndex.set_levels`, when passed in illegal level values, raises an error. When `inplace=True`, though, the illegal level values are still accepted. This commit fixes that behavior by checking that the proposed level values are legal before setting them. added tests and docs. lint added tests. force build force build force build force build for osx --- doc/source/whatsnew/v0.19.0.txt | 3 ++- pandas/indexes/multi.py | 28 +++++++++++++++++++--------- pandas/tests/indexes/test_multi.py | 21 ++++++++++++++++++++- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 60847469aa02c..f48967cc513af 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1560,6 +1560,7 @@ Bug Fixes - Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment. - Bug in ``eval()`` where the ``resolvers`` argument would not accept a list (:issue:`14095`) - Bugs in ``stack``, ``get_dummies``, ``make_axis_dummies`` which don't preserve categorical dtypes in (multi)indexes (:issue:`13854`) -- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) +- ``PeriodIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) - Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`) - Bug in ``Index.copy()`` where ``name`` parameter was ignored (:issue:`14302`) +- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 1ab5dbb737739..16dbf6cbc8c13 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -116,12 +116,22 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, return result - def _verify_integrity(self): + def _verify_integrity(self, new_labels=None, new_levels=None): """Raises ValueError if length of levels and labels don't match or any - label would exceed level bounds""" + label would exceed level bounds + + Parameters + ---------- + new_labels : optional list + Labels to check for validity. Defaults to current labels. + new_levels : optional list + Levels to check for validity. Defaults to current levels. + """ # NOTE: Currently does not check, among other things, that cached # nlevels matches nor that sortorder matches actually sortorder. - labels, levels = self.labels, self.levels + labels = new_labels or self.labels + levels = new_levels or self.levels + if len(levels) != len(labels): raise ValueError("Length of levels and labels must match. NOTE:" " this index is in an inconsistent state.") @@ -162,6 +172,9 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, new_levels[l] = _ensure_index(v, copy=copy)._shallow_copy() new_levels = FrozenList(new_levels) + if verify_integrity: + self._verify_integrity(new_levels=new_levels) + names = self.names self._levels = new_levels if any(names): @@ -170,9 +183,6 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, self._tuples = None self._reset_cache() - if verify_integrity: - self._verify_integrity() - def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): """ @@ -268,13 +278,13 @@ def _set_labels(self, labels, level=None, copy=False, validate=True, lab, lev, copy=copy)._shallow_copy() new_labels = FrozenList(new_labels) + if verify_integrity: + self._verify_integrity(new_labels=new_labels) + self._labels = new_labels self._tuples = None self._reset_cache() - if verify_integrity: - self._verify_integrity() - def set_labels(self, labels, level=None, inplace=False, verify_integrity=True): """ diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index cd9ce0102ca1e..eb99f8c746b3b 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -154,7 +154,7 @@ def assert_matching(actual, expected): # as much as possible self.assertEqual(len(actual), len(expected)) for act, exp in zip(actual, expected): - act = np.asarray(act) + act = np.asarray(act, dtype=np.object_) exp = np.asarray(exp, dtype=np.object_) tm.assert_numpy_array_equal(act, exp) @@ -204,6 +204,25 @@ def assert_matching(actual, expected): assert_matching(ind2.levels, new_levels) assert_matching(self.index.levels, levels) + # illegal level changing should not change levels + # GH 13754 + original_index = self.index.copy() + with assertRaisesRegexp(ValueError, "^On"): + self.index.set_levels(['c'], level=0, inplace=True) + assert_matching(self.index.levels, original_index.levels) + + with assertRaisesRegexp(ValueError, "^On"): + self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=True) + assert_matching(self.index.labels, original_index.labels) + + with assertRaisesRegexp(TypeError, "^Levels"): + self.index.set_levels('c', level=0, inplace=True) + assert_matching(self.index.levels, original_index.levels) + + with assertRaisesRegexp(TypeError, "^Labels"): + self.index.set_labels(1, level=0, inplace=True) + assert_matching(self.index.labels, original_index.labels) + def test_set_labels(self): # side note - you probably wouldn't want to use levels and labels # directly like this - but it is possible. From fbace3908d02b337cd7270951bea5a039315e121 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Sat, 24 Sep 2016 23:02:50 -0400 Subject: [PATCH 2/9] force build --- pandas/indexes/multi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 16dbf6cbc8c13..c9b83d129c272 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -175,6 +175,7 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, if verify_integrity: self._verify_integrity(new_levels=new_levels) + names = self.names self._levels = new_levels if any(names): From 6e56ff3db1fb3b3476d321760c27b17eb006aad4 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Sat, 24 Sep 2016 23:03:04 -0400 Subject: [PATCH 3/9] force build --- pandas/indexes/multi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index c9b83d129c272..16dbf6cbc8c13 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -175,7 +175,6 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, if verify_integrity: self._verify_integrity(new_levels=new_levels) - names = self.names self._levels = new_levels if any(names): From 40bcdc7d8e15e0f22925bac49e5c26d220282e82 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Tue, 27 Sep 2016 17:05:29 -0400 Subject: [PATCH 4/9] add test for inplace=False --- pandas/tests/indexes/test_multi.py | 31 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index eb99f8c746b3b..cc2ce2d2dc1fd 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -207,21 +207,22 @@ def assert_matching(actual, expected): # illegal level changing should not change levels # GH 13754 original_index = self.index.copy() - with assertRaisesRegexp(ValueError, "^On"): - self.index.set_levels(['c'], level=0, inplace=True) - assert_matching(self.index.levels, original_index.levels) - - with assertRaisesRegexp(ValueError, "^On"): - self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=True) - assert_matching(self.index.labels, original_index.labels) - - with assertRaisesRegexp(TypeError, "^Levels"): - self.index.set_levels('c', level=0, inplace=True) - assert_matching(self.index.levels, original_index.levels) - - with assertRaisesRegexp(TypeError, "^Labels"): - self.index.set_labels(1, level=0, inplace=True) - assert_matching(self.index.labels, original_index.labels) + for inplace in [True, False]: + with assertRaisesRegexp(ValueError, "^On"): + self.index.set_levels(['c'], level=0, inplace=inplace) + assert_matching(self.index.levels, original_index.levels) + + with assertRaisesRegexp(ValueError, "^On"): + self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) + assert_matching(self.index.labels, original_index.labels) + + with assertRaisesRegexp(TypeError, "^Levels"): + self.index.set_levels('c', level=0, inplace=inplace) + assert_matching(self.index.levels, original_index.levels) + + with assertRaisesRegexp(TypeError, "^Labels"): + self.index.set_labels(1, level=0, inplace=inplace) + assert_matching(self.index.labels, original_index.labels) def test_set_labels(self): # side note - you probably wouldn't want to use levels and labels From be8dfba4ffa9cf6994a5524017444df0d3c75ba2 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Tue, 27 Sep 2016 17:48:33 -0400 Subject: [PATCH 5/9] lint --- pandas/tests/indexes/test_multi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index cc2ce2d2dc1fd..c7f4b46a7a9cf 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -213,7 +213,8 @@ def assert_matching(actual, expected): assert_matching(self.index.levels, original_index.levels) with assertRaisesRegexp(ValueError, "^On"): - self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) + self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, + inplace=inplace) assert_matching(self.index.labels, original_index.labels) with assertRaisesRegexp(TypeError, "^Levels"): From 30bbeb458ff58549111c5861e1565fac36130379 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Tue, 27 Sep 2016 20:36:48 -0400 Subject: [PATCH 6/9] try not setting dtype --- pandas/tests/indexes/test_multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index c7f4b46a7a9cf..3203af42b2188 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -154,7 +154,7 @@ def assert_matching(actual, expected): # as much as possible self.assertEqual(len(actual), len(expected)) for act, exp in zip(actual, expected): - act = np.asarray(act, dtype=np.object_) + act = np.asarray(act) exp = np.asarray(exp, dtype=np.object_) tm.assert_numpy_array_equal(act, exp) From e1a6e0af7d0f44593814f1db80e98aeae361b15a Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Wed, 28 Sep 2016 09:09:10 -0400 Subject: [PATCH 7/9] put back in conversion to object --- pandas/tests/indexes/test_multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 3203af42b2188..c7f4b46a7a9cf 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -154,7 +154,7 @@ def assert_matching(actual, expected): # as much as possible self.assertEqual(len(actual), len(expected)) for act, exp in zip(actual, expected): - act = np.asarray(act) + act = np.asarray(act, dtype=np.object_) exp = np.asarray(exp, dtype=np.object_) tm.assert_numpy_array_equal(act, exp) From 005ab5182f0dc57e5e671ab0a33404c3d0b34fa1 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 30 Sep 2016 13:53:48 -0400 Subject: [PATCH 8/9] fix test coercion --- pandas/tests/indexes/test_multi.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index c7f4b46a7a9cf..4dda01640f38d 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -149,13 +149,16 @@ def test_set_levels(self): levels = self.index.levels new_levels = [[lev + 'a' for lev in level] for level in levels] - def assert_matching(actual, expected): + def assert_matching(actual, expected, coerce_to_obj=True): # avoid specifying internal representation # as much as possible self.assertEqual(len(actual), len(expected)) for act, exp in zip(actual, expected): - act = np.asarray(act, dtype=np.object_) - exp = np.asarray(exp, dtype=np.object_) + act = np.asarray(act) + if coerce_to_obj: + exp = np.asarray(exp, dtype=np.object_) + else: + exp = np.asarray(exp) tm.assert_numpy_array_equal(act, exp) # level changing [w/o mutation] @@ -210,20 +213,24 @@ def assert_matching(actual, expected): for inplace in [True, False]: with assertRaisesRegexp(ValueError, "^On"): self.index.set_levels(['c'], level=0, inplace=inplace) - assert_matching(self.index.levels, original_index.levels) + assert_matching(self.index.levels, original_index.levels, + coerce_to_obj=False) with assertRaisesRegexp(ValueError, "^On"): self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) - assert_matching(self.index.labels, original_index.labels) + assert_matching(self.index.labels, original_index.labels, + coerce_to_obj=False) with assertRaisesRegexp(TypeError, "^Levels"): self.index.set_levels('c', level=0, inplace=inplace) - assert_matching(self.index.levels, original_index.levels) + assert_matching(self.index.levels, original_index.levels, + coerce_to_obj=False) with assertRaisesRegexp(TypeError, "^Labels"): self.index.set_labels(1, level=0, inplace=inplace) - assert_matching(self.index.labels, original_index.labels) + assert_matching(self.index.labels, original_index.labels, + coerce_to_obj=False) def test_set_labels(self): # side note - you probably wouldn't want to use levels and labels From 39b7bde0a36f5278917a5e861cb60521d19ae27e Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 7 Oct 2016 16:29:43 -0400 Subject: [PATCH 9/9] addressed comments --- doc/source/whatsnew/v0.19.0.txt | 1 - doc/source/whatsnew/v0.19.1.txt | 1 + pandas/indexes/multi.py | 23 ++++++++++++++--------- pandas/tests/indexes/test_multi.py | 17 +++++++---------- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index f48967cc513af..8e7e95c071ea4 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1563,4 +1563,3 @@ Bug Fixes - ``PeriodIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) - Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`) - Bug in ``Index.copy()`` where ``name`` parameter was ignored (:issue:`14302`) -- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 21b9fac6ffacf..92c7746d1c023 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -31,3 +31,4 @@ Performance Improvements Bug Fixes ~~~~~~~~~ - Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`) +- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 16dbf6cbc8c13..0c465da24a17e 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -116,21 +116,26 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, return result - def _verify_integrity(self, new_labels=None, new_levels=None): - """Raises ValueError if length of levels and labels don't match or any - label would exceed level bounds + def _verify_integrity(self, labels=None, levels=None): + """ Parameters ---------- - new_labels : optional list + labels : optional list Labels to check for validity. Defaults to current labels. - new_levels : optional list + levels : optional list Levels to check for validity. Defaults to current levels. + + Raises + ------ + ValueError + * if length of levels and labels don't match or any label would + exceed level bounds """ # NOTE: Currently does not check, among other things, that cached # nlevels matches nor that sortorder matches actually sortorder. - labels = new_labels or self.labels - levels = new_levels or self.levels + labels = labels or self.labels + levels = levels or self.levels if len(levels) != len(labels): raise ValueError("Length of levels and labels must match. NOTE:" @@ -173,7 +178,7 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, new_levels = FrozenList(new_levels) if verify_integrity: - self._verify_integrity(new_levels=new_levels) + self._verify_integrity(levels=new_levels) names = self.names self._levels = new_levels @@ -279,7 +284,7 @@ def _set_labels(self, labels, level=None, copy=False, validate=True, new_labels = FrozenList(new_labels) if verify_integrity: - self._verify_integrity(new_labels=new_labels) + self._verify_integrity(labels=new_labels) self._labels = new_labels self._tuples = None diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 4dda01640f38d..fdc5a2eaec812 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -149,17 +149,14 @@ def test_set_levels(self): levels = self.index.levels new_levels = [[lev + 'a' for lev in level] for level in levels] - def assert_matching(actual, expected, coerce_to_obj=True): + def assert_matching(actual, expected, check_dtype=False): # avoid specifying internal representation # as much as possible self.assertEqual(len(actual), len(expected)) for act, exp in zip(actual, expected): act = np.asarray(act) - if coerce_to_obj: - exp = np.asarray(exp, dtype=np.object_) - else: - exp = np.asarray(exp) - tm.assert_numpy_array_equal(act, exp) + exp = np.asarray(exp) + tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) # level changing [w/o mutation] ind2 = self.index.set_levels(new_levels) @@ -214,23 +211,23 @@ def assert_matching(actual, expected, coerce_to_obj=True): with assertRaisesRegexp(ValueError, "^On"): self.index.set_levels(['c'], level=0, inplace=inplace) assert_matching(self.index.levels, original_index.levels, - coerce_to_obj=False) + check_dtype=True) with assertRaisesRegexp(ValueError, "^On"): self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) assert_matching(self.index.labels, original_index.labels, - coerce_to_obj=False) + check_dtype=True) with assertRaisesRegexp(TypeError, "^Levels"): self.index.set_levels('c', level=0, inplace=inplace) assert_matching(self.index.levels, original_index.levels, - coerce_to_obj=False) + check_dtype=True) with assertRaisesRegexp(TypeError, "^Labels"): self.index.set_labels(1, level=0, inplace=inplace) assert_matching(self.index.labels, original_index.labels, - coerce_to_obj=False) + check_dtype=True) def test_set_labels(self): # side note - you probably wouldn't want to use levels and labels