Skip to content

BUG: .loc with MultiIndex with tuple in level GH#27591 #42329

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Aug 8, 2021
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ Interval

Indexing
^^^^^^^^
-
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` with a :class:`MultiIndex` when indexing with a tuple in which one of the levels is also a tuple (:issue:`27591`)
-

Missing
Expand Down
41 changes: 30 additions & 11 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3232,18 +3232,37 @@ def _update_indexer(idxr: Index | None, indexer: Index | None, key) -> Index:
# a collection of labels to include from this level (these
# are or'd)
indexers: Int64Index | None = None
for x in k:
try:
idxrs = _convert_to_indexer(
self._get_level_indexer(x, level=i, indexer=indexer)
)
indexers = (idxrs if indexers is None else indexers).union(
idxrs, sort=False
)
except KeyError:

# ignore not founds
continue
# GH#27591 check if this is a single tuple key in the level
try:
lev_loc = self._get_level_indexer(k, level=i, indexer=indexer)
except (InvalidIndexError, TypeError, KeyError) as err:
# InvalidIndexError e.g. non-hashable, fall back to treating
# this as a sequence of labels
# KeyError it can be ambiguous if this is a label or sequence
# of labels
# github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708
for x in k:
if not is_hashable(x):
# e.g. slice
raise err
try:
idxrs = _convert_to_indexer(
self._get_level_indexer(x, level=i, indexer=indexer)
)
except KeyError:
# ignore not founds
continue
else:
indexers = (idxrs if indexers is None else indexers).union(
idxrs, sort=False
)

else:
idxrs = _convert_to_indexer(lev_loc)
indexers = (idxrs if indexers is None else indexers).union(
idxrs, sort=False
)

if indexers is not None:
indexer = _update_indexer(indexers, indexer=indexer, key=seq)
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,10 +874,13 @@ def _getitem_nested_tuple(self, tup: tuple):
if self.name != "loc":
# This should never be reached, but lets be explicit about it
raise ValueError("Too many indices")
if isinstance(self.obj, ABCSeries) and any(
isinstance(k, tuple) for k in tup
if any(
isinstance(k, tuple) and any(not is_hashable(item) for item in k)
for k in tup
):
# GH#35349 Raise if tuple in tuple for series
# e.g. test_loc_series_getitem_too_many_dimensions
# tup = (('A', slice(None, None, None)), slice(None, None, None))
raise ValueError("Too many indices")
if self.ndim == 1 or not any(isinstance(x, slice) for x in tup):
# GH#10521 Series should reduce MultiIndex dimensions instead of
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2517,6 +2517,36 @@ def test_loc_with_period_index_indexer():
tm.assert_frame_equal(df, df.loc[list(idx)])


def test_loc_getitem_multiindex_tuple_level():
# GH#27591
lev1 = ["a", "b", "c"]
lev2 = [(0, 1), (1, 0)]
lev3 = [0, 1]
cols = MultiIndex.from_product([lev1, lev2, lev3], names=["x", "y", "z"])
df = DataFrame(6, index=range(5), columns=cols)

# the lev2[0] here should be treated as a single label, not as a sequence
# of labels
result = df.loc[:, (lev1[0], lev2[0], lev3[0])]

# TODO: i think this actually should drop levels
expected = df.iloc[:, :1]
tm.assert_frame_equal(result, expected)

alt = df.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=1)
tm.assert_frame_equal(alt, expected)

# same thing on a Series
ser = df.iloc[0]
expected2 = ser.iloc[:1]

alt2 = ser.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=0)
tm.assert_series_equal(alt2, expected2)

result2 = ser.loc[lev1[0], lev2[0], lev3[0]]
assert result2 == 6


class TestLocSeries:
@pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)])
def test_loc_uint64(self, val, expected):
Expand Down