diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 84d7399cc4f2d..a577cd49978b5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2993,56 +2993,76 @@ def _update_indexer(idxr, indexer=indexer): indexer = Index(np.arange(n)) if idxr is None: return indexer - return indexer & idxr + return idxr & indexer - for i, k in enumerate(seq): + need_sort = False + indexer = Index(np.arange(n)) + for i, k in enumerate(reversed(seq)): + i = len(seq) - 1 - i # Counting in reverse if com.is_bool_indexer(k): # a boolean indexer, must be the same length! k = np.asarray(k) - indexer = _update_indexer(_convert_to_indexer(k), indexer=indexer) + indexer = _convert_to_indexer(k) & indexer elif is_list_like(k): # a collection of labels to include from this level (these # are or'd) - indexers = None + indexers_union = None + if not need_sort: + # Find out if the list_like label are sorted as the levels or not + k_codes = np.array( + [self.levels[i].get_loc(e) for e in k if e in self.levels[i]] + ) + need_sort = not (k_codes[:-1] < k_codes[1:]).all() for x in k: try: idxrs = _convert_to_indexer( self._get_level_indexer(x, level=i, indexer=indexer) ) - indexers = idxrs if indexers is None else indexers | idxrs + if need_sort and indexer is not None: + # We intersect with indexer to make idxrs + # ordered as previously seen indexes + idxrs = indexer.intersection(idxrs) + + indexers_union = ( + idxrs + if indexers_union is None + else indexers_union.union(idxrs, sort=False) + ) except KeyError: - # ignore not founds continue - if indexers is not None: - indexer = _update_indexer(indexers, indexer=indexer) + if indexers_union is not None: + if need_sort: + indexer = indexers_union + else: + indexer = indexers_union & indexer else: # no matches we are done return Int64Index([])._ndarray_values elif com.is_null_slice(k): # empty slice - indexer = _update_indexer(None, indexer=indexer) + # index is given to conserve the order of this level + indexer = Int64Index(np.arange(n)) & indexer elif isinstance(k, slice): - # a slice, include BOTH of the labels - indexer = _update_indexer( + indexer = ( _convert_to_indexer( self._get_level_indexer(k, level=i, indexer=indexer) - ), - indexer=indexer, + ) + & indexer ) else: # a single label - indexer = _update_indexer( + indexer = ( _convert_to_indexer( self.get_loc_level(k, level=i, drop_level=False)[0] - ), - indexer=indexer, + ) + & indexer ) # empty indexer diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 5382ad84bcca2..427a0bc0c9869 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2471,3 +2471,48 @@ def test_sort_ascending_list(self): result = s.sort_index(level=["third", "first"], ascending=[False, True]) expected = s.iloc[[0, 4, 1, 5, 2, 6, 3, 7]] tm.assert_series_equal(result, expected) + + def test_multiindex_loc_order(self): + # GH 22797 + # Try to respect order of keys given for MultiIndex.loc + df = pd.DataFrame( + np.arange(12).reshape((4, 3)), + index=[["a", "a", "b", "b"], [1, 2, 1, 2]], + columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]], + ) + + res = df.loc[["b", "a"], :] + exp_index = pd.MultiIndex.from_arrays([["b", "b", "a", "a"], [1, 2, 1, 2]]) + tm.assert_index_equal(res.index, exp_index) + + res = df.loc[["a", "b"], :] + exp_index = pd.MultiIndex.from_arrays([["a", "a", "b", "b"], [1, 2, 1, 2]]) + tm.assert_index_equal(res.index, exp_index) + + res = df.loc[(["a", "b"], [1, 2]), :] + exp_index = pd.MultiIndex.from_arrays([["a", "a", "b", "b"], [1, 2, 1, 2]]) + tm.assert_index_equal(res.index, exp_index) + + res = df.loc[(["a", "b"], [2, 1]), :] + exp_index = pd.MultiIndex.from_arrays([["a", "a", "b", "b"], [2, 1, 2, 1]]) + tm.assert_index_equal(res.index, exp_index) + + res = df.loc[(["b", "a"], [2, 1]), :] + exp_index = pd.MultiIndex.from_arrays([["b", "b", "a", "a"], [2, 1, 2, 1]]) + tm.assert_index_equal(res.index, exp_index) + + res = df.loc[(["b", "a"], [1, 2]), :] + exp_index = pd.MultiIndex.from_arrays([["b", "b", "a", "a"], [1, 2, 1, 2]]) + tm.assert_index_equal(res.index, exp_index) + + res = df.loc[:, ["Colorado", "Ohio"]] + exp_columns = pd.MultiIndex.from_arrays( + [["Colorado", "Ohio", "Ohio"], ["Green", "Green", "Red"]] + ) + tm.assert_index_equal(res.columns, exp_columns) + + res = df.loc[:, (["Colorado", "Ohio"], ["Red", "Green"])] + exp_columns = pd.MultiIndex.from_arrays( + [["Colorado", "Ohio", "Ohio"], ["Green", "Red", "Green"]] + ) + tm.assert_index_equal(res.columns, exp_columns)