From 6f2e51d4ce0e517c8f81ea8ea857819d0cbcbe11 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jun 2021 13:58:16 -0700 Subject: [PATCH 1/4] REF: remove drop_level kwarg from MultiIndex._get_loc_level --- pandas/core/generic.py | 10 +++++++--- pandas/core/indexes/multi.py | 29 ++++++++++++++++------------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c052b977ea07a..424041a59f3a9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3765,11 +3765,15 @@ class animal locomotion if isinstance(index, MultiIndex): try: - loc, new_index = index._get_loc_level( - key, level=0, drop_level=drop_level - ) + loc, new_index = index._get_loc_level(key, level=0) except TypeError as e: raise TypeError(f"Expected label or tuple of labels, got {key}") from e + else: + if not drop_level: + if lib.is_integer(loc): + new_index = index[loc : loc + 1] + else: + new_index = index[loc] else: loc = index.get_loc(key) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 39efc57052bc4..e7488e7693e6e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2931,17 +2931,22 @@ def get_loc_level(self, key, level=0, drop_level: bool = True): level = self._get_level_number(level) else: level = [self._get_level_number(lev) for lev in level] - return self._get_loc_level(key, level=level, drop_level=drop_level) - def _get_loc_level(self, key, level: int | list[int] = 0, drop_level: bool = True): + loc, mi = self._get_loc_level(key, level=level) + if not drop_level: + if lib.is_integer(loc): + mi = self[loc : loc + 1] + else: + mi = self[loc] + return loc, mi + + def _get_loc_level(self, key, level: int | list[int] = 0): """ get_loc_level but with `level` known to be positional, not name-based. """ # different name to distinguish from maybe_droplevels - def maybe_mi_droplevels(indexer, levels, drop_level: bool): - if not drop_level: - return self[indexer] + def maybe_mi_droplevels(indexer, levels): # kludge around orig_index = new_index = self[indexer] @@ -2969,7 +2974,7 @@ def maybe_mi_droplevels(indexer, levels, drop_level: bool): result = loc if result is None else result & loc - return result, maybe_mi_droplevels(result, level, drop_level) + return result, maybe_mi_droplevels(result, level) # kludge for #1796 if isinstance(key, list): @@ -2980,7 +2985,7 @@ def maybe_mi_droplevels(indexer, levels, drop_level: bool): try: if key in self.levels[0]: indexer = self._get_level_indexer(key, level=level) - new_index = maybe_mi_droplevels(indexer, [0], drop_level) + new_index = maybe_mi_droplevels(indexer, [0]) return indexer, new_index except (TypeError, InvalidIndexError): pass @@ -2995,7 +3000,7 @@ def partial_selection(key, indexer=None): ilevels = [ i for i in range(len(key)) if key[i] != slice(None, None) ] - return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) + return indexer, maybe_mi_droplevels(indexer, ilevels) if len(key) == self.nlevels and self.is_unique: # Complete key in unique index -> standard get_loc @@ -3030,10 +3035,10 @@ def partial_selection(key, indexer=None): if indexer is None: indexer = slice(None, None) ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] - return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) + return indexer, maybe_mi_droplevels(indexer, ilevels) else: indexer = self._get_level_indexer(key, level=level) - return indexer, maybe_mi_droplevels(indexer, [level], drop_level) + return indexer, maybe_mi_droplevels(indexer, [level]) def _get_level_indexer(self, key, level: int = 0, indexer=None): # `level` kwarg is _always_ positional, never name @@ -3268,9 +3273,7 @@ def _update_indexer(idxr: Index | None, indexer: Index | None, key) -> Index: else: # a single label indexer = _update_indexer( - _convert_to_indexer( - self.get_loc_level(k, level=i, drop_level=False)[0] - ), + _convert_to_indexer(self.get_loc_level(k, level=i)[0]), indexer=indexer, key=seq, ) From 247e70c66ef593c486e399805db7f7d0d603775e Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jun 2021 18:46:05 -0700 Subject: [PATCH 2/4] get_loc_level-> _get_loc_level --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e7488e7693e6e..cac0a8cc5791f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3273,7 +3273,7 @@ def _update_indexer(idxr: Index | None, indexer: Index | None, key) -> Index: else: # a single label indexer = _update_indexer( - _convert_to_indexer(self.get_loc_level(k, level=i)[0]), + _convert_to_indexer(self._get_loc_level(k, level=i)[0]), indexer=indexer, key=seq, ) From e722751d7486cd3f38ba5e5a75841f5b503fb696 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Jun 2021 07:30:15 -0700 Subject: [PATCH 3/4] REF: avoid try/except in maybe_mi_droplevel --- pandas/core/generic.py | 6 ++- pandas/core/indexes/multi.py | 41 +++++++++---------- .../indexes/datetimes/test_partial_slicing.py | 11 ++--- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 424041a59f3a9..f2a1d3465cafd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3766,8 +3766,10 @@ class animal locomotion if isinstance(index, MultiIndex): try: loc, new_index = index._get_loc_level(key, level=0) - except TypeError as e: - raise TypeError(f"Expected label or tuple of labels, got {key}") from e + except TypeError as err: + raise TypeError( + f"Expected label or tuple of labels, got {key}" + ) from err else: if not drop_level: if lib.is_integer(loc): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index cac0a8cc5791f..f21dad7bc927b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2947,16 +2947,11 @@ def _get_loc_level(self, key, level: int | list[int] = 0): # different name to distinguish from maybe_droplevels def maybe_mi_droplevels(indexer, levels): - # kludge around - orig_index = new_index = self[indexer] + new_index = self[indexer] for i in sorted(levels, reverse=True): - try: - new_index = new_index._drop_level_numbers([i]) - except ValueError: + new_index = new_index._drop_level_numbers([i]) - # no dropping here - return orig_index return new_index if isinstance(level, (tuple, list)): @@ -2992,24 +2987,26 @@ def maybe_mi_droplevels(indexer, levels): if not any(isinstance(k, slice) for k in key): - # partial selection - # optionally get indexer to avoid re-calculation - def partial_selection(key, indexer=None): - if indexer is None: - indexer = self.get_loc(key) - ilevels = [ - i for i in range(len(key)) if key[i] != slice(None, None) - ] - return indexer, maybe_mi_droplevels(indexer, ilevels) - if len(key) == self.nlevels and self.is_unique: # Complete key in unique index -> standard get_loc try: return (self._engine.get_loc(key), None) - except KeyError as e: - raise KeyError(key) from e - else: - return partial_selection(key) + except KeyError as err: + raise KeyError(key) from err + + # partial selection + indexer = self.get_loc(key) + ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] + if len(ilevels) == self.nlevels: + if is_integer(indexer): + # we are dropping all levels + return indexer, None + + # TODO: in some cases we still need to drop some levels, + # e.g. test_multiindex_perf_warn + ilevels = [] + return indexer, maybe_mi_droplevels(indexer, ilevels) + else: indexer = None for i, k in enumerate(key): @@ -3030,7 +3027,7 @@ def partial_selection(key, indexer=None): if indexer is None: indexer = k_index - else: # pragma: no cover + else: indexer &= k_index if indexer is None: indexer = slice(None, None) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 882515799f943..ce8a30e81619c 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -284,22 +284,23 @@ def test_partial_slicing_with_multiindex(self): with pytest.raises(IndexingError, match=msg): df_multi.loc[("2013-06-19", "ACCT1", "ABC")] + def test_partial_slicing_with_multiindex_series(self): # GH 4294 # partial slice on a series mi - s = DataFrame( + ser = DataFrame( np.random.rand(1000, 1000), index=date_range("2000-1-1", periods=1000) ).stack() - s2 = s[:-1].copy() + s2 = ser[:-1].copy() expected = s2["2000-1-4"] result = s2[Timestamp("2000-1-4")] tm.assert_series_equal(result, expected) - result = s[Timestamp("2000-1-4")] - expected = s["2000-1-4"] + result = ser[Timestamp("2000-1-4")] + expected = ser["2000-1-4"] tm.assert_series_equal(result, expected) - df2 = DataFrame(s) + df2 = DataFrame(ser) expected = df2.xs("2000-1-4") result = df2.loc[Timestamp("2000-1-4")] tm.assert_frame_equal(result, expected) From 842934aa6ca28d0538b1f97925087fa3179f3019 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Jun 2021 09:09:40 -0700 Subject: [PATCH 4/4] make doctest into regular test, fix it --- pandas/core/indexes/multi.py | 12 ++++++++++-- pandas/tests/frame/indexing/test_xs.py | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f21dad7bc927b..f16e4a8cd6bea 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2966,10 +2966,18 @@ def maybe_mi_droplevels(indexer, levels): mask = np.zeros(len(self), dtype=bool) mask[loc] = True loc = mask - result = loc if result is None else result & loc - return result, maybe_mi_droplevels(result, level) + try: + # FIXME: we should be only dropping levels on which we are + # scalar-indexing + mi = maybe_mi_droplevels(result, level) + except ValueError: + # droplevel failed because we tried to drop all levels, + # i.e. len(level) == self.nlevels + mi = self[result] + + return result, mi # kludge for #1796 if isinstance(key, list): diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index ccd989e2de411..a76aec9ebda44 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -129,6 +129,23 @@ def test_xs_view(self, using_array_manager): class TestXSWithMultiIndex: + def test_xs_doc_example(self): + # TODO: more descriptive name + # based on example in advanced.rst + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = list(zip(*arrays)) + + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) + + result = df.xs(("one", "bar"), level=("second", "first"), axis=1) + + expected = df.iloc[:, [0]] + tm.assert_frame_equal(result, expected) + def test_xs_integer_key(self): # see GH#2107 dates = range(20111201, 20111205)