From 13a0374eb2a7d2bde4212a9099db5e7143121ad0 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Jun 2021 13:52:39 -0700 Subject: [PATCH] BUG: .loc with MultiIndex with tuple in level GH#27591 --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/core/indexes/multi.py | 41 ++++++++++++++++++++++--------- pandas/core/indexing.py | 7 ++++-- pandas/tests/indexing/test_loc.py | 30 ++++++++++++++++++++++ 4 files changed, 66 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 81545ada63ce5..ebef08cc74aff 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -158,7 +158,7 @@ Interval Indexing ^^^^^^^^ -- +- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` with a :class:`MultiIndex` when indexing with a tuple in which one of the levels is also a tuple (:issue:`27591`) - Missing diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 39efc57052bc4..b076330423825 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3232,18 +3232,37 @@ def _update_indexer(idxr: Index | None, indexer: Index | None, key) -> Index: # a collection of labels to include from this level (these # are or'd) indexers: Int64Index | None = None - for x in k: - try: - idxrs = _convert_to_indexer( - self._get_level_indexer(x, level=i, indexer=indexer) - ) - indexers = (idxrs if indexers is None else indexers).union( - idxrs, sort=False - ) - except KeyError: - # ignore not founds - continue + # GH#27591 check if this is a single tuple key in the level + try: + lev_loc = self._get_level_indexer(k, level=i, indexer=indexer) + except (InvalidIndexError, TypeError, KeyError) as err: + # InvalidIndexError e.g. non-hashable, fall back to treating + # this as a sequence of labels + # KeyError it can be ambiguous if this is a label or sequence + # of labels + # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708 + for x in k: + if not is_hashable(x): + # e.g. slice + raise err + try: + idxrs = _convert_to_indexer( + self._get_level_indexer(x, level=i, indexer=indexer) + ) + except KeyError: + # ignore not founds + continue + else: + indexers = (idxrs if indexers is None else indexers).union( + idxrs, sort=False + ) + + else: + idxrs = _convert_to_indexer(lev_loc) + indexers = (idxrs if indexers is None else indexers).union( + idxrs, sort=False + ) if indexers is not None: indexer = _update_indexer(indexers, indexer=indexer, key=seq) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f8578d87e4cad..11dc90161fd96 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -874,10 +874,13 @@ def _getitem_nested_tuple(self, tup: tuple): if self.name != "loc": # This should never be reached, but lets be explicit about it raise ValueError("Too many indices") - if isinstance(self.obj, ABCSeries) and any( - isinstance(k, tuple) for k in tup + if any( + isinstance(k, tuple) and any(not is_hashable(item) for item in k) + for k in tup ): # GH#35349 Raise if tuple in tuple for series + # e.g. test_loc_series_getitem_too_many_dimensions + # tup = (('A', slice(None, None, None)), slice(None, None, None)) raise ValueError("Too many indices") if self.ndim == 1 or not any(isinstance(x, slice) for x in tup): # GH#10521 Series should reduce MultiIndex dimensions instead of diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index e96b25418d408..4aa1d52d2f082 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2517,6 +2517,36 @@ def test_loc_with_period_index_indexer(): tm.assert_frame_equal(df, df.loc[list(idx)]) +def test_loc_getitem_multiindex_tuple_level(): + # GH#27591 + lev1 = ["a", "b", "c"] + lev2 = [(0, 1), (1, 0)] + lev3 = [0, 1] + cols = MultiIndex.from_product([lev1, lev2, lev3], names=["x", "y", "z"]) + df = DataFrame(6, index=range(5), columns=cols) + + # the lev2[0] here should be treated as a single label, not as a sequence + # of labels + result = df.loc[:, (lev1[0], lev2[0], lev3[0])] + + # TODO: i think this actually should drop levels + expected = df.iloc[:, :1] + tm.assert_frame_equal(result, expected) + + alt = df.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=1) + tm.assert_frame_equal(alt, expected) + + # same thing on a Series + ser = df.iloc[0] + expected2 = ser.iloc[:1] + + alt2 = ser.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=0) + tm.assert_series_equal(alt2, expected2) + + result2 = ser.loc[lev1[0], lev2[0], lev3[0]] + assert result2 == 6 + + class TestLocSeries: @pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)]) def test_loc_uint64(self, val, expected):