Skip to content

BUG: Preserve key order when using loc on MultiIndex DataFrame #29190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
52 changes: 36 additions & 16 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2993,56 +2993,76 @@ def _update_indexer(idxr, indexer=indexer):
indexer = Index(np.arange(n))
if idxr is None:
return indexer
return indexer & idxr
return idxr & indexer

for i, k in enumerate(seq):
need_sort = False
indexer = Index(np.arange(n))
for i, k in enumerate(reversed(seq)):
i = len(seq) - 1 - i # Counting in reverse

if com.is_bool_indexer(k):
# a boolean indexer, must be the same length!
k = np.asarray(k)
indexer = _update_indexer(_convert_to_indexer(k), indexer=indexer)
indexer = _convert_to_indexer(k) & indexer

elif is_list_like(k):
# a collection of labels to include from this level (these
# are or'd)
indexers = None
indexers_union = None
if not need_sort:
# Find out if the list_like label are sorted as the levels or not
k_codes = np.array(
[self.levels[i].get_loc(e) for e in k if e in self.levels[i]]
)
need_sort = not (k_codes[:-1] < k_codes[1:]).all()
for x in k:
try:
idxrs = _convert_to_indexer(
self._get_level_indexer(x, level=i, indexer=indexer)
)
indexers = idxrs if indexers is None else indexers | idxrs
if need_sort and indexer is not None:
# We intersect with indexer to make idxrs
# ordered as previously seen indexes
idxrs = indexer.intersection(idxrs)

indexers_union = (
idxrs
if indexers_union is None
else indexers_union.union(idxrs, sort=False)
)
except KeyError:

# ignore not founds
continue

if indexers is not None:
indexer = _update_indexer(indexers, indexer=indexer)
if indexers_union is not None:
if need_sort:
indexer = indexers_union
else:
indexer = indexers_union & indexer
else:
# no matches we are done
return Int64Index([])._ndarray_values

elif com.is_null_slice(k):
# empty slice
indexer = _update_indexer(None, indexer=indexer)
# index is given to conserve the order of this level
indexer = Int64Index(np.arange(n)) & indexer

elif isinstance(k, slice):

# a slice, include BOTH of the labels
indexer = _update_indexer(
indexer = (
_convert_to_indexer(
self._get_level_indexer(k, level=i, indexer=indexer)
),
indexer=indexer,
)
& indexer
)
else:
# a single label
indexer = _update_indexer(
indexer = (
_convert_to_indexer(
self.get_loc_level(k, level=i, drop_level=False)[0]
),
indexer=indexer,
)
& indexer
)

# empty indexer
Expand Down
45 changes: 45 additions & 0 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2471,3 +2471,48 @@ def test_sort_ascending_list(self):
result = s.sort_index(level=["third", "first"], ascending=[False, True])
expected = s.iloc[[0, 4, 1, 5, 2, 6, 3, 7]]
tm.assert_series_equal(result, expected)

def test_multiindex_loc_order(self):
# GH 22797
# Try to respect order of keys given for MultiIndex.loc
df = pd.DataFrame(
np.arange(12).reshape((4, 3)),
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]],
)

res = df.loc[["b", "a"], :]
exp_index = pd.MultiIndex.from_arrays([["b", "b", "a", "a"], [1, 2, 1, 2]])
tm.assert_index_equal(res.index, exp_index)

res = df.loc[["a", "b"], :]
exp_index = pd.MultiIndex.from_arrays([["a", "a", "b", "b"], [1, 2, 1, 2]])
tm.assert_index_equal(res.index, exp_index)

res = df.loc[(["a", "b"], [1, 2]), :]
exp_index = pd.MultiIndex.from_arrays([["a", "a", "b", "b"], [1, 2, 1, 2]])
tm.assert_index_equal(res.index, exp_index)

res = df.loc[(["a", "b"], [2, 1]), :]
exp_index = pd.MultiIndex.from_arrays([["a", "a", "b", "b"], [2, 1, 2, 1]])
tm.assert_index_equal(res.index, exp_index)

res = df.loc[(["b", "a"], [2, 1]), :]
exp_index = pd.MultiIndex.from_arrays([["b", "b", "a", "a"], [2, 1, 2, 1]])
tm.assert_index_equal(res.index, exp_index)

res = df.loc[(["b", "a"], [1, 2]), :]
exp_index = pd.MultiIndex.from_arrays([["b", "b", "a", "a"], [1, 2, 1, 2]])
tm.assert_index_equal(res.index, exp_index)

res = df.loc[:, ["Colorado", "Ohio"]]
exp_columns = pd.MultiIndex.from_arrays(
[["Colorado", "Ohio", "Ohio"], ["Green", "Green", "Red"]]
)
tm.assert_index_equal(res.columns, exp_columns)

res = df.loc[:, (["Colorado", "Ohio"], ["Red", "Green"])]
exp_columns = pd.MultiIndex.from_arrays(
[["Colorado", "Ohio", "Ohio"], ["Green", "Red", "Green"]]
)
tm.assert_index_equal(res.columns, exp_columns)