diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 151d853166563..620a42ea516d8 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -452,6 +452,7 @@ Removal of prior version deprecations/changes - Enforced disallowing using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) - Enforced disallowing the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`) - Enforced disallowing a tuple of column labels into :meth:`.DataFrameGroupBy.__getitem__` (:issue:`30546`) +- Enforced disallowing missing labels when indexing with a sequence of labels on a level of a :class:`MultiIndex`. This now raises a ``KeyError`` (:issue:`42351`) - Enforced disallowing setting values with ``.loc`` using a positional slice. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`) - Enforced disallowing positional indexing with a ``float`` key even if that key is a round number, manually cast to integer instead (:issue:`34193`) - Enforced disallowing using a :class:`DataFrame` indexer with ``.iloc``, use ``.loc`` instead for automatic alignment (:issue:`39022`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1e255241cf222..04a57c1709382 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3284,34 +3284,18 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]: if not is_hashable(x): # e.g. slice raise err - try: - item_indexer = self._get_level_indexer( - x, level=i, indexer=indexer - ) - except KeyError: - # ignore not founds; see discussion in GH#39424 - warnings.warn( - "The behavior of indexing on a MultiIndex with a " - "nested sequence of labels is deprecated and will " - "change in a future version. " - "`series.loc[label, sequence]` will raise if any " - "members of 'sequence' or not present in " - "the index's second level. To retain the old " - "behavior, use `series.index.isin(sequence, level=1)`", - # TODO: how to opt in to the future behavior? - # TODO: how to handle IntervalIndex level? - # (no test cases) - FutureWarning, - stacklevel=find_stack_level(), - ) - continue + # GH 39424: Ignore not founds + # GH 42351: No longer ignore not founds & enforced in 2.0 + # TODO: how to handle IntervalIndex level? (no test cases) + item_indexer = self._get_level_indexer( + x, level=i, indexer=indexer + ) + if lvl_indexer is None: + lvl_indexer = _to_bool_indexer(item_indexer) + elif isinstance(item_indexer, slice): + lvl_indexer[item_indexer] = True # type: ignore[index] else: - if lvl_indexer is None: - lvl_indexer = _to_bool_indexer(item_indexer) - elif isinstance(item_indexer, slice): - lvl_indexer[item_indexer] = True # type: ignore[index] - else: - lvl_indexer |= item_indexer + lvl_indexer |= item_indexer if lvl_indexer is None: # no matches we are done diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index c25866c4f09e2..ac4bb1093d84a 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -443,15 +443,12 @@ def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos): if expected.size == 0 and indexer != []: with pytest.raises(KeyError, match=str(indexer)): ser.loc[indexer] + elif indexer == (slice(None), ["foo", "bah"]): + # "bah" is not in idx.levels[1], raising KeyError enforced in 2.0 + with pytest.raises(KeyError, match="'bah'"): + ser.loc[indexer] else: - warn = None - msg = "MultiIndex with a nested sequence" - if indexer == (slice(None), ["foo", "bah"]): - # "bah" is not in idx.levels[1], so is ignored, will raise KeyError - warn = FutureWarning - - with tm.assert_produces_warning(warn, match=msg): - result = ser.loc[indexer] + result = ser.loc[indexer] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index c97505eacd4c4..32ab0336aa93f 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -1,3 +1,4 @@ +import contextlib import copy import re from textwrap import dedent @@ -701,26 +702,26 @@ def test_applymap_subset(self, slice_, df): def test_applymap_subset_multiindex(self, slice_): # GH 19861 # edited for GH 33562 - warn = None - msg = "indexing on a MultiIndex with a nested sequence of labels" if ( isinstance(slice_[-1], tuple) and isinstance(slice_[-1][-1], list) and "C" in slice_[-1][-1] ): - warn = FutureWarning + ctx = pytest.raises(KeyError, match="C") # noqa: PDF010 elif ( isinstance(slice_[0], tuple) and isinstance(slice_[0][1], list) and 3 in slice_[0][1] ): - warn = FutureWarning + ctx = pytest.raises(KeyError, match="3") # noqa: PDF010 + else: + ctx = contextlib.nullcontext() idx = MultiIndex.from_product([["a", "b"], [1, 2]]) col = MultiIndex.from_product([["x", "y"], ["A", "B"]]) df = DataFrame(np.random.rand(4, 4), columns=col, index=idx) - with tm.assert_produces_warning(warn, match=msg): + with ctx: df.style.applymap(lambda x: "color: red;", subset=slice_).to_html() def test_applymap_subset_multiindex_code(self): @@ -1390,7 +1391,7 @@ def test_non_reducing_slice_on_multiindex(self): IndexSlice[:, IndexSlice["a", :, "e"]], IndexSlice[:, IndexSlice[:, "c", "e"]], IndexSlice[:, IndexSlice["a", ["c", "d"], :]], # check list - IndexSlice[:, IndexSlice["a", ["c", "d", "-"], :]], # allow missing + IndexSlice[:, IndexSlice["a", ["c", "d", "-"], :]], # don't allow missing IndexSlice[:, IndexSlice["a", ["c", "d", "-"], "e"]], # no slice # check rows IndexSlice[IndexSlice[["U"]], :], # inferred deeper need list @@ -1399,7 +1400,7 @@ def test_non_reducing_slice_on_multiindex(self): IndexSlice[IndexSlice["U", :, "Y"], :], IndexSlice[IndexSlice[:, "W", "Y"], :], IndexSlice[IndexSlice[:, "W", ["Y", "Z"]], :], # check list - IndexSlice[IndexSlice[:, "W", ["Y", "Z", "-"]], :], # allow missing + IndexSlice[IndexSlice[:, "W", ["Y", "Z", "-"]], :], # don't allow missing IndexSlice[IndexSlice["U", "W", ["Y", "Z", "-"]], :], # no slice # check simultaneous IndexSlice[IndexSlice[:, "W", "Y"], IndexSlice["a", "c", :]], @@ -1411,21 +1412,18 @@ def test_non_reducing_multi_slice_on_multiindex(self, slice_): idxs = MultiIndex.from_product([["U", "V"], ["W", "X"], ["Y", "Z"]]) df = DataFrame(np.arange(64).reshape(8, 8), columns=cols, index=idxs) - msg = "indexing on a MultiIndex with a nested sequence of labels" - warn = None for lvl in [0, 1]: key = slice_[lvl] if isinstance(key, tuple): for subkey in key: if isinstance(subkey, list) and "-" in subkey: - # not present in the index level, ignored, will raise in future - warn = FutureWarning - - with tm.assert_produces_warning(warn, match=msg): - expected = df.loc[slice_] + # not present in the index level, raises KeyError since 2.0 + with pytest.raises(KeyError, match="-"): + df.loc[slice_] + return - with tm.assert_produces_warning(warn, match=msg): - result = df.loc[non_reducing_slice(slice_)] + expected = df.loc[slice_] + result = df.loc[non_reducing_slice(slice_)] tm.assert_frame_equal(result, expected)