Skip to content

Commit 35b3d62

Browse files
mroeschkemliu08
authored andcommitted
DEPR: Disallow missing nested label when indexing MultiIndex level (pandas-dev#49628)
DEPR: Disallow missing nesed label when indexing MultiIndex level
1 parent f7303b1 commit 35b3d62

File tree

4 files changed

+31
-51
lines changed

4 files changed

+31
-51
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,7 @@ Removal of prior version deprecations/changes
452452
- Enforced disallowing using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
453453
- Enforced disallowing the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`)
454454
- Enforced disallowing a tuple of column labels into :meth:`.DataFrameGroupBy.__getitem__` (:issue:`30546`)
455+
- Enforced disallowing missing labels when indexing with a sequence of labels on a level of a :class:`MultiIndex`. This now raises a ``KeyError`` (:issue:`42351`)
455456
- Enforced disallowing setting values with ``.loc`` using a positional slice. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
456457
- Enforced disallowing positional indexing with a ``float`` key even if that key is a round number, manually cast to integer instead (:issue:`34193`)
457458
- Enforced disallowing using a :class:`DataFrame` indexer with ``.iloc``, use ``.loc`` instead for automatic alignment (:issue:`39022`)

pandas/core/indexes/multi.py

+11-27
Original file line numberDiff line numberDiff line change
@@ -3284,34 +3284,18 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:
32843284
if not is_hashable(x):
32853285
# e.g. slice
32863286
raise err
3287-
try:
3288-
item_indexer = self._get_level_indexer(
3289-
x, level=i, indexer=indexer
3290-
)
3291-
except KeyError:
3292-
# ignore not founds; see discussion in GH#39424
3293-
warnings.warn(
3294-
"The behavior of indexing on a MultiIndex with a "
3295-
"nested sequence of labels is deprecated and will "
3296-
"change in a future version. "
3297-
"`series.loc[label, sequence]` will raise if any "
3298-
"members of 'sequence' or not present in "
3299-
"the index's second level. To retain the old "
3300-
"behavior, use `series.index.isin(sequence, level=1)`",
3301-
# TODO: how to opt in to the future behavior?
3302-
# TODO: how to handle IntervalIndex level?
3303-
# (no test cases)
3304-
FutureWarning,
3305-
stacklevel=find_stack_level(),
3306-
)
3307-
continue
3287+
# GH 39424: Ignore not founds
3288+
# GH 42351: No longer ignore not founds & enforced in 2.0
3289+
# TODO: how to handle IntervalIndex level? (no test cases)
3290+
item_indexer = self._get_level_indexer(
3291+
x, level=i, indexer=indexer
3292+
)
3293+
if lvl_indexer is None:
3294+
lvl_indexer = _to_bool_indexer(item_indexer)
3295+
elif isinstance(item_indexer, slice):
3296+
lvl_indexer[item_indexer] = True # type: ignore[index]
33083297
else:
3309-
if lvl_indexer is None:
3310-
lvl_indexer = _to_bool_indexer(item_indexer)
3311-
elif isinstance(item_indexer, slice):
3312-
lvl_indexer[item_indexer] = True # type: ignore[index]
3313-
else:
3314-
lvl_indexer |= item_indexer
3298+
lvl_indexer |= item_indexer
33153299

33163300
if lvl_indexer is None:
33173301
# no matches we are done

pandas/tests/indexing/multiindex/test_loc.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -443,15 +443,12 @@ def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
443443
if expected.size == 0 and indexer != []:
444444
with pytest.raises(KeyError, match=str(indexer)):
445445
ser.loc[indexer]
446+
elif indexer == (slice(None), ["foo", "bah"]):
447+
# "bah" is not in idx.levels[1], raising KeyError enforced in 2.0
448+
with pytest.raises(KeyError, match="'bah'"):
449+
ser.loc[indexer]
446450
else:
447-
warn = None
448-
msg = "MultiIndex with a nested sequence"
449-
if indexer == (slice(None), ["foo", "bah"]):
450-
# "bah" is not in idx.levels[1], so is ignored, will raise KeyError
451-
warn = FutureWarning
452-
453-
with tm.assert_produces_warning(warn, match=msg):
454-
result = ser.loc[indexer]
451+
result = ser.loc[indexer]
455452
tm.assert_series_equal(result, expected)
456453

457454

pandas/tests/io/formats/style/test_style.py

+14-16
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import contextlib
12
import copy
23
import re
34
from textwrap import dedent
@@ -701,26 +702,26 @@ def test_applymap_subset(self, slice_, df):
701702
def test_applymap_subset_multiindex(self, slice_):
702703
# GH 19861
703704
# edited for GH 33562
704-
warn = None
705-
msg = "indexing on a MultiIndex with a nested sequence of labels"
706705
if (
707706
isinstance(slice_[-1], tuple)
708707
and isinstance(slice_[-1][-1], list)
709708
and "C" in slice_[-1][-1]
710709
):
711-
warn = FutureWarning
710+
ctx = pytest.raises(KeyError, match="C") # noqa: PDF010
712711
elif (
713712
isinstance(slice_[0], tuple)
714713
and isinstance(slice_[0][1], list)
715714
and 3 in slice_[0][1]
716715
):
717-
warn = FutureWarning
716+
ctx = pytest.raises(KeyError, match="3") # noqa: PDF010
717+
else:
718+
ctx = contextlib.nullcontext()
718719

719720
idx = MultiIndex.from_product([["a", "b"], [1, 2]])
720721
col = MultiIndex.from_product([["x", "y"], ["A", "B"]])
721722
df = DataFrame(np.random.rand(4, 4), columns=col, index=idx)
722723

723-
with tm.assert_produces_warning(warn, match=msg):
724+
with ctx:
724725
df.style.applymap(lambda x: "color: red;", subset=slice_).to_html()
725726

726727
def test_applymap_subset_multiindex_code(self):
@@ -1390,7 +1391,7 @@ def test_non_reducing_slice_on_multiindex(self):
13901391
IndexSlice[:, IndexSlice["a", :, "e"]],
13911392
IndexSlice[:, IndexSlice[:, "c", "e"]],
13921393
IndexSlice[:, IndexSlice["a", ["c", "d"], :]], # check list
1393-
IndexSlice[:, IndexSlice["a", ["c", "d", "-"], :]], # allow missing
1394+
IndexSlice[:, IndexSlice["a", ["c", "d", "-"], :]], # don't allow missing
13941395
IndexSlice[:, IndexSlice["a", ["c", "d", "-"], "e"]], # no slice
13951396
# check rows
13961397
IndexSlice[IndexSlice[["U"]], :], # inferred deeper need list
@@ -1399,7 +1400,7 @@ def test_non_reducing_slice_on_multiindex(self):
13991400
IndexSlice[IndexSlice["U", :, "Y"], :],
14001401
IndexSlice[IndexSlice[:, "W", "Y"], :],
14011402
IndexSlice[IndexSlice[:, "W", ["Y", "Z"]], :], # check list
1402-
IndexSlice[IndexSlice[:, "W", ["Y", "Z", "-"]], :], # allow missing
1403+
IndexSlice[IndexSlice[:, "W", ["Y", "Z", "-"]], :], # don't allow missing
14031404
IndexSlice[IndexSlice["U", "W", ["Y", "Z", "-"]], :], # no slice
14041405
# check simultaneous
14051406
IndexSlice[IndexSlice[:, "W", "Y"], IndexSlice["a", "c", :]],
@@ -1411,21 +1412,18 @@ def test_non_reducing_multi_slice_on_multiindex(self, slice_):
14111412
idxs = MultiIndex.from_product([["U", "V"], ["W", "X"], ["Y", "Z"]])
14121413
df = DataFrame(np.arange(64).reshape(8, 8), columns=cols, index=idxs)
14131414

1414-
msg = "indexing on a MultiIndex with a nested sequence of labels"
1415-
warn = None
14161415
for lvl in [0, 1]:
14171416
key = slice_[lvl]
14181417
if isinstance(key, tuple):
14191418
for subkey in key:
14201419
if isinstance(subkey, list) and "-" in subkey:
1421-
# not present in the index level, ignored, will raise in future
1422-
warn = FutureWarning
1423-
1424-
with tm.assert_produces_warning(warn, match=msg):
1425-
expected = df.loc[slice_]
1420+
# not present in the index level, raises KeyError since 2.0
1421+
with pytest.raises(KeyError, match="-"):
1422+
df.loc[slice_]
1423+
return
14261424

1427-
with tm.assert_produces_warning(warn, match=msg):
1428-
result = df.loc[non_reducing_slice(slice_)]
1425+
expected = df.loc[slice_]
1426+
result = df.loc[non_reducing_slice(slice_)]
14291427
tm.assert_frame_equal(result, expected)
14301428

14311429

0 commit comments

Comments
 (0)