diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1abb01099f977..3724fe5b2c0bc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3767,8 +3767,10 @@ class animal locomotion if isinstance(index, MultiIndex): try: loc, new_index = index._get_loc_level(key, level=0) - except TypeError as e: - raise TypeError(f"Expected label or tuple of labels, got {key}") from e + except TypeError as err: + raise TypeError( + f"Expected label or tuple of labels, got {key}" + ) from err else: if not drop_level: if lib.is_integer(loc): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index cac0a8cc5791f..f16e4a8cd6bea 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2947,16 +2947,11 @@ def _get_loc_level(self, key, level: int | list[int] = 0): # different name to distinguish from maybe_droplevels def maybe_mi_droplevels(indexer, levels): - # kludge around - orig_index = new_index = self[indexer] + new_index = self[indexer] for i in sorted(levels, reverse=True): - try: - new_index = new_index._drop_level_numbers([i]) - except ValueError: + new_index = new_index._drop_level_numbers([i]) - # no dropping here - return orig_index return new_index if isinstance(level, (tuple, list)): @@ -2971,10 +2966,18 @@ def maybe_mi_droplevels(indexer, levels): mask = np.zeros(len(self), dtype=bool) mask[loc] = True loc = mask - result = loc if result is None else result & loc - return result, maybe_mi_droplevels(result, level) + try: + # FIXME: we should be only dropping levels on which we are + # scalar-indexing + mi = maybe_mi_droplevels(result, level) + except ValueError: + # droplevel failed because we tried to drop all levels, + # i.e. len(level) == self.nlevels + mi = self[result] + + return result, mi # kludge for #1796 if isinstance(key, list): @@ -2992,24 +2995,26 @@ def maybe_mi_droplevels(indexer, levels): if not any(isinstance(k, slice) for k in key): - # partial selection - # optionally get indexer to avoid re-calculation - def partial_selection(key, indexer=None): - if indexer is None: - indexer = self.get_loc(key) - ilevels = [ - i for i in range(len(key)) if key[i] != slice(None, None) - ] - return indexer, maybe_mi_droplevels(indexer, ilevels) - if len(key) == self.nlevels and self.is_unique: # Complete key in unique index -> standard get_loc try: return (self._engine.get_loc(key), None) - except KeyError as e: - raise KeyError(key) from e - else: - return partial_selection(key) + except KeyError as err: + raise KeyError(key) from err + + # partial selection + indexer = self.get_loc(key) + ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] + if len(ilevels) == self.nlevels: + if is_integer(indexer): + # we are dropping all levels + return indexer, None + + # TODO: in some cases we still need to drop some levels, + # e.g. test_multiindex_perf_warn + ilevels = [] + return indexer, maybe_mi_droplevels(indexer, ilevels) + else: indexer = None for i, k in enumerate(key): @@ -3030,7 +3035,7 @@ def partial_selection(key, indexer=None): if indexer is None: indexer = k_index - else: # pragma: no cover + else: indexer &= k_index if indexer is None: indexer = slice(None, None) diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index ccd989e2de411..a76aec9ebda44 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -129,6 +129,23 @@ def test_xs_view(self, using_array_manager): class TestXSWithMultiIndex: + def test_xs_doc_example(self): + # TODO: more descriptive name + # based on example in advanced.rst + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = list(zip(*arrays)) + + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) + + result = df.xs(("one", "bar"), level=("second", "first"), axis=1) + + expected = df.iloc[:, [0]] + tm.assert_frame_equal(result, expected) + def test_xs_integer_key(self): # see GH#2107 dates = range(20111201, 20111205) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 5e1fdc3b62f42..87c56ea588a5d 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -343,22 +343,23 @@ def test_partial_slicing_with_multiindex(self): with pytest.raises(IndexingError, match=msg): df_multi.loc[("2013-06-19", "ACCT1", "ABC")] + def test_partial_slicing_with_multiindex_series(self): # GH 4294 # partial slice on a series mi - s = DataFrame( + ser = DataFrame( np.random.rand(1000, 1000), index=date_range("2000-1-1", periods=1000) ).stack() - s2 = s[:-1].copy() + s2 = ser[:-1].copy() expected = s2["2000-1-4"] result = s2[Timestamp("2000-1-4")] tm.assert_series_equal(result, expected) - result = s[Timestamp("2000-1-4")] - expected = s["2000-1-4"] + result = ser[Timestamp("2000-1-4")] + expected = ser["2000-1-4"] tm.assert_series_equal(result, expected) - df2 = DataFrame(s) + df2 = DataFrame(ser) expected = df2.xs("2000-1-4") result = df2.loc[Timestamp("2000-1-4")] tm.assert_frame_equal(result, expected)