Skip to content

Commit 4fb83b0

Browse files
PERF: avoid unnecessary reordering in MultiIndex._reorder_indexer (#48384)
* Avoid unnecessary reordering in MultiIndex._reorder_indexer * whatsnew * mypy * cleanup * add multiindex.get_locs reordering tests * fix test Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 5e2e92e commit 4fb83b0

File tree

3 files changed

+55
-15
lines changed

3 files changed

+55
-15
lines changed

doc/source/whatsnew/v1.6.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ Performance improvements
109109
- Performance improvement in :meth:`.GroupBy.mean` and :meth:`.GroupBy.var` for extension array dtypes (:issue:`37493`)
110110
- Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`)
111111
- Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`)
112+
- Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`48384`)
112113
- Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`)
113114
- Performance improvement in ``var`` for nullable dtypes (:issue:`48379`).
114115
- Performance improvement to :func:`read_sas` with ``blank_missing=True`` (:issue:`48502`)

pandas/core/indexes/multi.py

+28-15
Original file line numberDiff line numberDiff line change
@@ -3466,22 +3466,35 @@ def _reorder_indexer(
34663466
-------
34673467
indexer : a sorted position indexer of self ordered as seq
34683468
"""
3469-
# If the index is lexsorted and the list_like label in seq are sorted
3470-
# then we do not need to sort
3471-
if self._is_lexsorted():
3472-
need_sort = False
3473-
for i, k in enumerate(seq):
3474-
if is_list_like(k):
3475-
if not need_sort:
3476-
k_codes = self.levels[i].get_indexer(k)
3477-
k_codes = k_codes[k_codes >= 0] # Filter absent keys
3478-
# True if the given codes are not ordered
3479-
need_sort = (k_codes[:-1] > k_codes[1:]).any()
3480-
elif isinstance(k, slice) and k.step is not None and k.step < 0:
3469+
3470+
# check if sorting is necessary
3471+
need_sort = False
3472+
for i, k in enumerate(seq):
3473+
if com.is_null_slice(k) or com.is_bool_indexer(k) or is_scalar(k):
3474+
pass
3475+
elif is_list_like(k):
3476+
if len(k) <= 1: # type: ignore[arg-type]
3477+
pass
3478+
elif self._is_lexsorted():
3479+
# If the index is lexsorted and the list_like label
3480+
# in seq are sorted then we do not need to sort
3481+
k_codes = self.levels[i].get_indexer(k)
3482+
k_codes = k_codes[k_codes >= 0] # Filter absent keys
3483+
# True if the given codes are not ordered
3484+
need_sort = (k_codes[:-1] > k_codes[1:]).any()
3485+
else:
34813486
need_sort = True
3482-
# Bail out if both index and seq are sorted
3483-
if not need_sort:
3484-
return indexer
3487+
elif isinstance(k, slice):
3488+
if self._is_lexsorted():
3489+
need_sort = k.step is not None and k.step < 0
3490+
else:
3491+
need_sort = True
3492+
else:
3493+
need_sort = True
3494+
if need_sort:
3495+
break
3496+
if not need_sort:
3497+
return indexer
34853498

34863499
n = len(self)
34873500
keys: tuple[np.ndarray, ...] = ()

pandas/tests/indexes/multi/test_indexing.py

+26
Original file line numberDiff line numberDiff line change
@@ -898,3 +898,29 @@ def test_pyint_engine():
898898
missing = tuple([0, 1] * 5 * N)
899899
result = index.get_indexer([missing] + [keys[i] for i in idces])
900900
tm.assert_numpy_array_equal(result, expected)
901+
902+
903+
@pytest.mark.parametrize(
904+
"keys,expected",
905+
[
906+
((slice(None), [5, 4]), [1, 0]),
907+
((slice(None), [4, 5]), [0, 1]),
908+
(([True, False, True], [4, 6]), [0, 2]),
909+
(([True, False, True], [6, 4]), [0, 2]),
910+
((2, [4, 5]), [0, 1]),
911+
((2, [5, 4]), [1, 0]),
912+
(([2], [4, 5]), [0, 1]),
913+
(([2], [5, 4]), [1, 0]),
914+
],
915+
)
916+
def test_get_locs_reordering(keys, expected):
917+
# GH48384
918+
idx = MultiIndex.from_arrays(
919+
[
920+
[2, 2, 1],
921+
[4, 5, 6],
922+
]
923+
)
924+
result = idx.get_locs(keys)
925+
expected = np.array(expected, dtype=np.intp)
926+
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)