Skip to content

BUG: .loc with MultiIndex with tuple in level GH#27591 #42329

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Aug 8, 2021
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ Interval
Indexing
^^^^^^^^
- Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` when the object's Index has a length greater than one but only one unique value (:issue:`42365`)
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` with a :class:`MultiIndex` when indexing with a tuple in which one of the levels is also a tuple (:issue:`27591`)
- Bug in :meth:`Series.loc` when with a :class:`MultiIndex` whose first level contains only ``np.nan`` values (:issue:`42055`)
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`)
- Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
Expand Down
81 changes: 54 additions & 27 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3281,35 +3281,62 @@ def _update_indexer(idxr: Index, indexer: Index) -> Index:
# are or'd)

indexers: Int64Index | None = None
for x in k:
try:
# Argument "indexer" to "_get_level_indexer" of "MultiIndex"
# has incompatible type "Index"; expected "Optional[Int64Index]"
item_lvl_indexer = self._get_level_indexer(
x, level=i, indexer=indexer # type: ignore[arg-type]
)
except KeyError:
# ignore not founds; see discussion in GH#39424
warnings.warn(
"The behavior of indexing on a MultiIndex with a nested "
"sequence of labels is deprecated and will change in a "
"future version. `series.loc[label, sequence]` will "
"raise if any members of 'sequence' or not present in "
"the index's second level. To retain the old behavior, "
"use `series.index.isin(sequence, level=1)`",
# TODO: how to opt in to the future behavior?
# TODO: how to handle IntervalIndex level? (no test cases)
FutureWarning,
stacklevel=7,
)
continue
else:
idxrs = _convert_to_indexer(item_lvl_indexer)

if indexers is None:
indexers = idxrs
# GH#27591 check if this is a single tuple key in the level
try:
# Argument "indexer" to "_get_level_indexer" of "MultiIndex"
# has incompatible type "Index"; expected "Optional[Int64Index]"
lev_loc = self._get_level_indexer(
k, level=i, indexer=indexer # type: ignore[arg-type]
)
except (InvalidIndexError, TypeError, KeyError) as err:
# InvalidIndexError e.g. non-hashable, fall back to treating
# this as a sequence of labels
# KeyError it can be ambiguous if this is a label or sequence
# of labels
# github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708
for x in k:
if not is_hashable(x):
# e.g. slice
raise err
try:
# Argument "indexer" to "_get_level_indexer" of "MultiIndex"
# has incompatible type "Index"; expected
# "Optional[Int64Index]"
item_lvl_indexer = self._get_level_indexer(
x, level=i, indexer=indexer # type: ignore[arg-type]
)
except KeyError:
# ignore not founds; see discussion in GH#39424
warnings.warn(
"The behavior of indexing on a MultiIndex with a "
"nested sequence of labels is deprecated and will "
"change in a future version. "
"`series.loc[label, sequence]` will raise if any "
"members of 'sequence' or not present in "
"the index's second level. To retain the old "
"behavior, use `series.index.isin(sequence, level=1)`",
# TODO: how to opt in to the future behavior?
# TODO: how to handle IntervalIndex level?
# (no test cases)
FutureWarning,
stacklevel=7,
)
continue
else:
indexers = indexers.union(idxrs, sort=False)
idxrs = _convert_to_indexer(item_lvl_indexer)

if indexers is None:
indexers = idxrs
else:
indexers = indexers.union(idxrs, sort=False)

else:
idxrs = _convert_to_indexer(lev_loc)
if indexers is None:
indexers = idxrs
else:
indexers = indexers.union(idxrs, sort=False)

if indexers is not None:
indexer = _update_indexer(indexers, indexer=indexer)
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2541,6 +2541,36 @@ def test_loc_with_period_index_indexer():
tm.assert_frame_equal(df, df.loc[list(idx)])


def test_loc_getitem_multiindex_tuple_level():
# GH#27591
lev1 = ["a", "b", "c"]
lev2 = [(0, 1), (1, 0)]
lev3 = [0, 1]
cols = MultiIndex.from_product([lev1, lev2, lev3], names=["x", "y", "z"])
df = DataFrame(6, index=range(5), columns=cols)

# the lev2[0] here should be treated as a single label, not as a sequence
# of labels
result = df.loc[:, (lev1[0], lev2[0], lev3[0])]

# TODO: i think this actually should drop levels
expected = df.iloc[:, :1]
tm.assert_frame_equal(result, expected)

alt = df.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=1)
tm.assert_frame_equal(alt, expected)

# same thing on a Series
ser = df.iloc[0]
expected2 = ser.iloc[:1]

alt2 = ser.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=0)
tm.assert_series_equal(alt2, expected2)

result2 = ser.loc[lev1[0], lev2[0], lev3[0]]
assert result2 == 6


class TestLocSeries:
@pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)])
def test_loc_uint64(self, val, expected):
Expand Down