From 2cb6d8c9602f2872de98fc4bb42218e9f02f2949 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 16 Sep 2016 14:47:31 -0400 Subject: [PATCH] BUG: `set_levels` set illegal levels. `MultiIndex.set_levels`, when passed in illegal level values, raises an error. When `inplace=True`, though, the illegal level values are still accepted. This commit fixes that behavior by checking that the proposed level values are legal before setting them. added tests and docs. lint added tests. force build force build force build force build for osx --- doc/source/whatsnew/v0.19.0.txt | 3 ++- pandas/indexes/multi.py | 28 +++++++++++++++++++--------- pandas/tests/indexes/test_multi.py | 21 ++++++++++++++++++++- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 454ffc5e5c685..1e6d8543dc55a 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1576,5 +1576,6 @@ Bug Fixes - Bug in ``eval()`` where the ``resolvers`` argument would not accept a list (:issue:`14095`) - Bugs in ``stack``, ``get_dummies``, ``make_axis_dummies`` which don't preserve categorical dtypes in (multi)indexes (:issue:`13854`) -- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) +- ``PeriodIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) - Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`) +- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 09c755b2c9792..12a7964f807fd 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -116,12 +116,22 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, return result - def _verify_integrity(self): + def _verify_integrity(self, new_labels=None, new_levels=None): """Raises ValueError if length of levels and labels don't match or any - label would exceed level bounds""" + label would exceed level bounds + + Parameters + ---------- + new_labels : optional list + Labels to check for validity. Defaults to current labels. + new_levels : optional list + Levels to check for validity. Defaults to current levels. + """ # NOTE: Currently does not check, among other things, that cached # nlevels matches nor that sortorder matches actually sortorder. - labels, levels = self.labels, self.levels + labels = new_labels or self.labels + levels = new_levels or self.levels + if len(levels) != len(labels): raise ValueError("Length of levels and labels must match. NOTE:" " this index is in an inconsistent state.") @@ -162,6 +172,9 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, new_levels[l] = _ensure_index(v, copy=copy)._shallow_copy() new_levels = FrozenList(new_levels) + if verify_integrity: + self._verify_integrity(new_levels=new_levels) + names = self.names self._levels = new_levels if any(names): @@ -170,9 +183,6 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, self._tuples = None self._reset_cache() - if verify_integrity: - self._verify_integrity() - def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): """ @@ -268,13 +278,13 @@ def _set_labels(self, labels, level=None, copy=False, validate=True, lab, lev, copy=copy)._shallow_copy() new_labels = FrozenList(new_labels) + if verify_integrity: + self._verify_integrity(new_labels=new_labels) + self._labels = new_labels self._tuples = None self._reset_cache() - if verify_integrity: - self._verify_integrity() - def set_labels(self, labels, level=None, inplace=False, verify_integrity=True): """ diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 5248f0775d22f..76211c9b3eee4 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -154,7 +154,7 @@ def assert_matching(actual, expected): # as much as possible self.assertEqual(len(actual), len(expected)) for act, exp in zip(actual, expected): - act = np.asarray(act) + act = np.asarray(act, dtype=np.object_) exp = np.asarray(exp, dtype=np.object_) tm.assert_numpy_array_equal(act, exp) @@ -204,6 +204,25 @@ def assert_matching(actual, expected): assert_matching(ind2.levels, new_levels) assert_matching(self.index.levels, levels) + # illegal level changing should not change levels + # GH 13754 + original_index = self.index.copy() + with assertRaisesRegexp(ValueError, "^On"): + self.index.set_levels(['c'], level=0, inplace=True) + assert_matching(self.index.levels, original_index.levels) + + with assertRaisesRegexp(ValueError, "^On"): + self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=True) + assert_matching(self.index.labels, original_index.labels) + + with assertRaisesRegexp(TypeError, "^Levels"): + self.index.set_levels('c', level=0, inplace=True) + assert_matching(self.index.levels, original_index.levels) + + with assertRaisesRegexp(TypeError, "^Labels"): + self.index.set_labels(1, level=0, inplace=True) + assert_matching(self.index.labels, original_index.labels) + def test_set_labels(self): # side note - you probably wouldn't want to use levels and labels # directly like this - but it is possible.