From 129b286088c9f545e272b1dd519e3c14fbfb3f32 Mon Sep 17 00:00:00 2001 From: Avi Sen Date: Wed, 3 Jan 2018 04:06:22 -0800 Subject: [PATCH 1/6] BUG: x in MultiIndex.drop(x) --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/indexes/multi.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index bd3bee507baa3..6515a7e196bd5 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -321,7 +321,7 @@ Indexing - Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) - Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) - Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) - +- Bug in :func:`MultiIndex.__contains__` where integer keys would return ``True`` even if they had been dropped (:issue:`19027`) I/O ^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7107378671ba5..6edc2443f7813 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2120,6 +2120,12 @@ def _maybe_to_slice(loc): mask[loc] = True return mask + if isinstance(key, int): + try: + return self.get_loc((key,)) + except LookupError: + raise KeyError(key) + if not isinstance(key, tuple): loc = self._get_level_indexer(key, level=0) return _maybe_to_slice(loc) From 9aec9de64b7ca35aa05862ebdfa7df9744264006 Mon Sep 17 00:00:00 2001 From: Avi Sen Date: Wed, 3 Jan 2018 15:38:48 -0800 Subject: [PATCH 2/6] fixing stack and typeerror checks --- pandas/core/indexes/multi.py | 2 +- pandas/core/reshape/reshape.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6edc2443f7813..6a5afbe7b288f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2123,7 +2123,7 @@ def _maybe_to_slice(loc): if isinstance(key, int): try: return self.get_loc((key,)) - except LookupError: + except (LookupError, TypeError): raise KeyError(key) if not isinstance(key, tuple): diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 1ca014baa9ec8..589cea047465a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -628,7 +628,11 @@ def _convert_level_number(level_num, columns): levsize = len(level_labels) drop_cols = [] for key in unique_groups: - loc = this.columns.get_loc(key) + try: + loc = this.columns.get_loc(key) + except KeyError: + drop_cols.append(key) + continue # can make more efficient? # we almost always return a slice From 35ee8f0e2d66c7e82f8af7f700004666dcdaf615 Mon Sep 17 00:00:00 2001 From: Avi Sen Date: Wed, 3 Jan 2018 15:52:18 -0800 Subject: [PATCH 3/6] added test --- pandas/tests/indexing/test_multiindex.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index f69b9d98143b0..97b2ff56e7c3f 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -705,6 +705,15 @@ def test_multiindex_symmetric_difference(self): result = idx ^ idx2 assert result.names == [None, None] + def test_multiindex_contains_dropped(self): + # GH 19027 + idx = MultiIndex.from_product([[1, 2], [3, 4]]) + assert 2 in idx + idx = idx.drop(2) + # drop implementation keeps 2 in the levels + assert 2 in idx.levels[0] + # but it should no longer be in the index itself + assert 2 not in idx class TestMultiIndexSlicers(object): From 698853fc16769bcdfbae722cd3ea4ba54663b87e Mon Sep 17 00:00:00 2001 From: Avi Sen Date: Thu, 4 Jan 2018 19:24:26 -0800 Subject: [PATCH 4/6] made fixes, added additional test --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/indexes/multi.py | 10 ++++------ pandas/tests/indexing/test_multiindex.py | 9 +++++++++ 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6515a7e196bd5..ef4b56b4ebce4 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -321,7 +321,7 @@ Indexing - Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) - Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) - Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) -- Bug in :func:`MultiIndex.__contains__` where integer keys would return ``True`` even if they had been dropped (:issue:`19027`) +- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`) I/O ^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6a5afbe7b288f..ef114d6e05801 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2120,14 +2120,12 @@ def _maybe_to_slice(loc): mask[loc] = True return mask - if isinstance(key, int): - try: - return self.get_loc((key,)) - except (LookupError, TypeError): - raise KeyError(key) - if not isinstance(key, tuple): loc = self._get_level_indexer(key, level=0) + # _get_level_indexer returns an empty slice if the key has + # been dropped from the MultiIndex + if isinstance(loc, slice) and loc.start == loc.stop: + raise KeyError(key) return _maybe_to_slice(loc) keylen = len(key) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index 97b2ff56e7c3f..ab968ed602aec 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -710,11 +710,20 @@ def test_multiindex_contains_dropped(self): idx = MultiIndex.from_product([[1, 2], [3, 4]]) assert 2 in idx idx = idx.drop(2) + # drop implementation keeps 2 in the levels assert 2 in idx.levels[0] # but it should no longer be in the index itself assert 2 not in idx + # also applies to strings + idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']]) + assert 'a' in idx + idx = idx.drop('a') + assert 'a' in idx.levels[0] + assert 'a' not in idx + + class TestMultiIndexSlicers(object): def test_per_axis_per_level_getitem(self): From 5da98302223f48e9f1eb777e8f975a19882afae1 Mon Sep 17 00:00:00 2001 From: Avi Sen Date: Thu, 4 Jan 2018 20:25:24 -0800 Subject: [PATCH 5/6] updated failing test based on new behavior --- pandas/tests/frame/test_mutate_columns.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 26e2b801f6460..574a7b46447ec 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -195,7 +195,9 @@ def test_delitem_multiindex(self): # xref: https://github.com/pandas-dev/pandas/issues/2770 # the 'A' is STILL in the columns! - assert 'A' in df.columns + # the above was changed, 'A' is no longer in columns + # xref: https://github.com/pandas-dev/pandas/issues/19027 + assert 'A' not in df.columns with pytest.raises(KeyError): del df['A'] From 9b79e96127759de3aecd3f0b6fff5d6c5bf246bf Mon Sep 17 00:00:00 2001 From: Avi Sen Date: Sun, 7 Jan 2018 13:34:15 -0800 Subject: [PATCH 6/6] remove redundant dropped columns conditional --- pandas/core/reshape/reshape.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 589cea047465a..d6aed064e49f8 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -643,10 +643,7 @@ def _convert_level_number(level_num, columns): else: slice_len = loc.stop - loc.start - if slice_len == 0: - drop_cols.append(key) - continue - elif slice_len != levsize: + if slice_len != levsize: chunk = this.loc[:, this.columns[loc]] chunk.columns = level_vals.take(chunk.columns.labels[-1]) value_slice = chunk.reindex(columns=level_vals_used).values