diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index c4fc222353738..23eae18c4c121 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -338,7 +338,7 @@ Conversion - Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) - Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`) - Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) -- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) +- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) - Bug in :class:`Series`` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) - Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (issue:`19042`) - Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (issue:`19043`) @@ -364,6 +364,7 @@ Indexing - Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) - Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) - Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) +- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`) - Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) - Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 398ee7d0aef3c..5739c8dfd8b53 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2123,6 +2123,11 @@ def _maybe_to_slice(loc): if not isinstance(key, tuple): loc = self._get_level_indexer(key, level=0) + + # _get_level_indexer returns an empty slice if the key has + # been dropped from the MultiIndex + if isinstance(loc, slice) and loc.start == loc.stop: + raise KeyError(key) return _maybe_to_slice(loc) keylen = len(key) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 1ca014baa9ec8..d6aed064e49f8 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -628,7 +628,11 @@ def _convert_level_number(level_num, columns): levsize = len(level_labels) drop_cols = [] for key in unique_groups: - loc = this.columns.get_loc(key) + try: + loc = this.columns.get_loc(key) + except KeyError: + drop_cols.append(key) + continue # can make more efficient? # we almost always return a slice @@ -639,10 +643,7 @@ def _convert_level_number(level_num, columns): else: slice_len = loc.stop - loc.start - if slice_len == 0: - drop_cols.append(key) - continue - elif slice_len != levsize: + if slice_len != levsize: chunk = this.loc[:, this.columns[loc]] chunk.columns = level_vals.take(chunk.columns.labels[-1]) value_slice = chunk.reindex(columns=level_vals_used).values diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 26e2b801f6460..9acdf2f17d86a 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -193,9 +193,10 @@ def test_delitem_multiindex(self): with pytest.raises(KeyError): del df[('A',)] - # xref: https://github.com/pandas-dev/pandas/issues/2770 - # the 'A' is STILL in the columns! - assert 'A' in df.columns + # behavior of dropped/deleted MultiIndex levels changed from + # GH 2770 to GH 19027: MultiIndex no longer '.__contains__' + # levels which are dropped/deleted + assert 'A' not in df.columns with pytest.raises(KeyError): del df['A'] diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index f69b9d98143b0..43656a392e582 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -705,6 +705,26 @@ def test_multiindex_symmetric_difference(self): result = idx ^ idx2 assert result.names == [None, None] + def test_multiindex_contains_dropped(self): + # GH 19027 + # test that dropped MultiIndex levels are not in the MultiIndex + # despite continuing to be in the MultiIndex's levels + idx = MultiIndex.from_product([[1, 2], [3, 4]]) + assert 2 in idx + idx = idx.drop(2) + + # drop implementation keeps 2 in the levels + assert 2 in idx.levels[0] + # but it should no longer be in the index itself + assert 2 not in idx + + # also applies to strings + idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']]) + assert 'a' in idx + idx = idx.drop('a') + assert 'a' in idx.levels[0] + assert 'a' not in idx + class TestMultiIndexSlicers(object):