diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index fa9c424351b00..2dcbef46e4f75 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -232,6 +232,7 @@ Interval Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` when the object's Index has a length greater than one but only one unique value (:issue:`42365`) +- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` with a :class:`MultiIndex` when indexing with a tuple in which one of the levels is also a tuple (:issue:`27591`) - Bug in :meth:`Series.loc` when with a :class:`MultiIndex` whose first level contains only ``np.nan`` values (:issue:`42055`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`) - Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e5aa8e95e23de..cb43020fcb75d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3288,35 +3288,62 @@ def _update_indexer(idxr: Index, indexer: Index) -> Index: # are or'd) indexers: Int64Index | None = None - for x in k: - try: - # Argument "indexer" to "_get_level_indexer" of "MultiIndex" - # has incompatible type "Index"; expected "Optional[Int64Index]" - item_lvl_indexer = self._get_level_indexer( - x, level=i, indexer=indexer # type: ignore[arg-type] - ) - except KeyError: - # ignore not founds; see discussion in GH#39424 - warnings.warn( - "The behavior of indexing on a MultiIndex with a nested " - "sequence of labels is deprecated and will change in a " - "future version. `series.loc[label, sequence]` will " - "raise if any members of 'sequence' or not present in " - "the index's second level. To retain the old behavior, " - "use `series.index.isin(sequence, level=1)`", - # TODO: how to opt in to the future behavior? - # TODO: how to handle IntervalIndex level? (no test cases) - FutureWarning, - stacklevel=7, - ) - continue - else: - idxrs = _convert_to_indexer(item_lvl_indexer) - if indexers is None: - indexers = idxrs + # GH#27591 check if this is a single tuple key in the level + try: + # Argument "indexer" to "_get_level_indexer" of "MultiIndex" + # has incompatible type "Index"; expected "Optional[Int64Index]" + lev_loc = self._get_level_indexer( + k, level=i, indexer=indexer # type: ignore[arg-type] + ) + except (InvalidIndexError, TypeError, KeyError) as err: + # InvalidIndexError e.g. non-hashable, fall back to treating + # this as a sequence of labels + # KeyError it can be ambiguous if this is a label or sequence + # of labels + # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708 + for x in k: + if not is_hashable(x): + # e.g. slice + raise err + try: + # Argument "indexer" to "_get_level_indexer" of "MultiIndex" + # has incompatible type "Index"; expected + # "Optional[Int64Index]" + item_lvl_indexer = self._get_level_indexer( + x, level=i, indexer=indexer # type: ignore[arg-type] + ) + except KeyError: + # ignore not founds; see discussion in GH#39424 + warnings.warn( + "The behavior of indexing on a MultiIndex with a " + "nested sequence of labels is deprecated and will " + "change in a future version. " + "`series.loc[label, sequence]` will raise if any " + "members of 'sequence' or not present in " + "the index's second level. To retain the old " + "behavior, use `series.index.isin(sequence, level=1)`", + # TODO: how to opt in to the future behavior? + # TODO: how to handle IntervalIndex level? + # (no test cases) + FutureWarning, + stacklevel=7, + ) + continue else: - indexers = indexers.union(idxrs, sort=False) + idxrs = _convert_to_indexer(item_lvl_indexer) + + if indexers is None: + indexers = idxrs + else: + indexers = indexers.union(idxrs, sort=False) + + else: + idxrs = _convert_to_indexer(lev_loc) + if indexers is None: + indexers = idxrs + else: + indexers = indexers.union(idxrs, sort=False) if indexers is not None: indexer = _update_indexer(indexers, indexer=indexer) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 6692a06c79d45..165c7b8414e4a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2583,6 +2583,36 @@ def test_loc_with_period_index_indexer(): tm.assert_frame_equal(df, df.loc[list(idx)]) +def test_loc_getitem_multiindex_tuple_level(): + # GH#27591 + lev1 = ["a", "b", "c"] + lev2 = [(0, 1), (1, 0)] + lev3 = [0, 1] + cols = MultiIndex.from_product([lev1, lev2, lev3], names=["x", "y", "z"]) + df = DataFrame(6, index=range(5), columns=cols) + + # the lev2[0] here should be treated as a single label, not as a sequence + # of labels + result = df.loc[:, (lev1[0], lev2[0], lev3[0])] + + # TODO: i think this actually should drop levels + expected = df.iloc[:, :1] + tm.assert_frame_equal(result, expected) + + alt = df.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=1) + tm.assert_frame_equal(alt, expected) + + # same thing on a Series + ser = df.iloc[0] + expected2 = ser.iloc[:1] + + alt2 = ser.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=0) + tm.assert_series_equal(alt2, expected2) + + result2 = ser.loc[lev1[0], lev2[0], lev3[0]] + assert result2 == 6 + + class TestLocSeries: @pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)]) def test_loc_uint64(self, val, expected):