Skip to content

Commit df5572c

Browse files
authored
BUG: Preserve key order when using loc on MultiIndex DataFrame (#28933)
1 parent 83f84ed commit df5572c

File tree

3 files changed

+96
-3
lines changed

3 files changed

+96
-3
lines changed

doc/source/whatsnew/v1.1.0.rst

+7-1
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,14 @@ Missing
161161

162162
MultiIndex
163163
^^^^^^^^^^
164+
- Bug in :meth:`Dataframe.loc` when used with a :class:`MultiIndex`. The returned values were not in the same order as the given inputs (:issue:`22797`)
164165

165-
-
166+
.. ipython:: python
167+
168+
df = pd.DataFrame(np.arange(4),
169+
index=[["a", "a", "b", "b"], [1, 2, 1, 2]])
170+
# Rows are now ordered as the requested keys
171+
df.loc[(['b', 'a'], [2, 1]), :]
166172
-
167173

168174
I/O

pandas/core/indexes/multi.py

+63-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import datetime
22
from sys import getsizeof
3-
from typing import Any, Hashable, List, Optional, Sequence, Union
3+
from typing import Any, Hashable, Iterable, List, Optional, Sequence, Tuple, Union
44
import warnings
55

66
import numpy as np
@@ -9,6 +9,7 @@
99

1010
from pandas._libs import Timestamp, algos as libalgos, index as libindex, lib, tslibs
1111
from pandas._libs.hashtable import duplicated_int64
12+
from pandas._typing import AnyArrayLike, ArrayLike, Scalar
1213
from pandas.compat.numpy import function as nv
1314
from pandas.errors import PerformanceWarning, UnsortedIndexError
1415
from pandas.util._decorators import Appender, cache_readonly
@@ -3081,9 +3082,69 @@ def _update_indexer(idxr, indexer=indexer):
30813082
# empty indexer
30823083
if indexer is None:
30833084
return Int64Index([])._ndarray_values
3085+
3086+
indexer = self._reorder_indexer(seq, indexer)
3087+
30843088
return indexer._ndarray_values
30853089

3086-
# --------------------------------------------------------------------
3090+
def _reorder_indexer(
3091+
self, seq: Tuple[Union[Scalar, Iterable, AnyArrayLike], ...], indexer: ArrayLike
3092+
) -> ArrayLike:
3093+
"""
3094+
Reorder an indexer of a MultiIndex (self) so that the label are in the
3095+
same order as given in seq
3096+
3097+
Parameters
3098+
----------
3099+
seq : label/slice/list/mask or a sequence of such
3100+
indexer: an Int64Index indexer of self
3101+
3102+
Returns
3103+
-------
3104+
indexer : a sorted Int64Index indexer of self ordered as seq
3105+
"""
3106+
# If the index is lexsorted and the list_like label in seq are sorted
3107+
# then we do not need to sort
3108+
if self.is_lexsorted():
3109+
need_sort = False
3110+
for i, k in enumerate(seq):
3111+
if is_list_like(k):
3112+
if not need_sort:
3113+
k_codes = self.levels[i].get_indexer(k)
3114+
k_codes = k_codes[k_codes >= 0] # Filter absent keys
3115+
# True if the given codes are not ordered
3116+
need_sort = (k_codes[:-1] > k_codes[1:]).any()
3117+
# Bail out if both index and seq are sorted
3118+
if not need_sort:
3119+
return indexer
3120+
3121+
n = len(self)
3122+
keys: Tuple[np.ndarray, ...] = tuple()
3123+
# For each level of the sequence in seq, map the level codes with the
3124+
# order they appears in a list-like sequence
3125+
# This mapping is then use to reorder the indexer
3126+
for i, k in enumerate(seq):
3127+
if com.is_bool_indexer(k):
3128+
new_order = np.arange(n)[indexer]
3129+
elif is_list_like(k):
3130+
# Generate a map with all level codes as sorted initially
3131+
key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len(
3132+
self.levels[i]
3133+
)
3134+
# Set order as given in the indexer list
3135+
level_indexer = self.levels[i].get_indexer(k)
3136+
level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys
3137+
key_order_map[level_indexer] = np.arange(len(level_indexer))
3138+
3139+
new_order = key_order_map[self.codes[i][indexer]]
3140+
else:
3141+
# For all other case, use the same order as the level
3142+
new_order = np.arange(n)[indexer]
3143+
keys = (new_order,) + keys
3144+
3145+
# Find the reordering using lexsort on the keys mapping
3146+
ind = np.lexsort(keys)
3147+
return indexer[ind]
30873148

30883149
def truncate(self, before=None, after=None):
30893150
"""

pandas/tests/test_multilevel.py

+26
Original file line numberDiff line numberDiff line change
@@ -2534,3 +2534,29 @@ def test_sort_ascending_list(self):
25342534
result = s.sort_index(level=["third", "first"], ascending=[False, True])
25352535
expected = s.iloc[[0, 4, 1, 5, 2, 6, 3, 7]]
25362536
tm.assert_series_equal(result, expected)
2537+
2538+
@pytest.mark.parametrize(
2539+
"keys, expected",
2540+
[
2541+
(["b", "a"], [["b", "b", "a", "a"], [1, 2, 1, 2]]),
2542+
(["a", "b"], [["a", "a", "b", "b"], [1, 2, 1, 2]]),
2543+
((["a", "b"], [1, 2]), [["a", "a", "b", "b"], [1, 2, 1, 2]]),
2544+
((["a", "b"], [2, 1]), [["a", "a", "b", "b"], [2, 1, 2, 1]]),
2545+
((["b", "a"], [2, 1]), [["b", "b", "a", "a"], [2, 1, 2, 1]]),
2546+
((["b", "a"], [1, 2]), [["b", "b", "a", "a"], [1, 2, 1, 2]]),
2547+
((["c", "a"], [2, 1]), [["c", "a", "a"], [1, 2, 1]]),
2548+
],
2549+
)
2550+
@pytest.mark.parametrize("dim", ["index", "columns"])
2551+
def test_multilevel_index_loc_order(self, dim, keys, expected):
2552+
# GH 22797
2553+
# Try to respect order of keys given for MultiIndex.loc
2554+
kwargs = {dim: [["c", "a", "a", "b", "b"], [1, 1, 2, 1, 2]]}
2555+
df = pd.DataFrame(np.arange(25).reshape(5, 5), **kwargs,)
2556+
exp_index = MultiIndex.from_arrays(expected)
2557+
if dim == "index":
2558+
res = df.loc[keys, :]
2559+
tm.assert_index_equal(res.index, exp_index)
2560+
elif dim == "columns":
2561+
res = df.loc[:, keys]
2562+
tm.assert_index_equal(res.columns, exp_index)

0 commit comments

Comments
 (0)