Skip to content

Commit e10ddad

Browse files
jbrockmendelfeefladder
authored andcommitted
REF: MultiIndex._engine followup to pandas-dev#43370 (pandas-dev#43421)
1 parent 69beb8c commit e10ddad

File tree

4 files changed

+12
-13
lines changed

4 files changed

+12
-13
lines changed

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ Performance improvements
291291
- Performance improvement in :func:`read_stata` (:issue:`43059`)
292292
- Performance improvement in :meth:`to_datetime` with ``uint`` dtypes (:issue:`42606`)
293293
- Performance improvement in :meth:`Series.sparse.to_coo` (:issue:`42880`)
294-
-
294+
- Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
295295

296296
.. ---------------------------------------------------------------------------
297297

pandas/_libs/index.pyi

+3-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ import numpy as np
22

33
from pandas._typing import npt
44

5+
from pandas import MultiIndex
6+
57
class IndexEngine:
68
over_size_threshold: bool
79
def __init__(self, vgetter, n: int): ...
@@ -59,7 +61,7 @@ class BaseMultiIndexCodesEngine:
5961
self,
6062
target: npt.NDArray[np.object_],
6163
) -> npt.NDArray[np.intp]: ...
62-
def _extract_level_codes(self, target: object): ...
64+
def _extract_level_codes(self, target: MultiIndex) -> np.ndarray: ...
6365
def get_indexer_with_fill(
6466
self,
6567
target: np.ndarray, # np.ndarray[object] of tuples

pandas/_libs/index.pyx

+5-7
Original file line numberDiff line numberDiff line change
@@ -622,23 +622,22 @@ cdef class BaseMultiIndexCodesEngine:
622622
in zip(self.levels, zt)]
623623
return self._codes_to_ints(np.array(level_codes, dtype='uint64').T)
624624

625-
def get_indexer(self, target) -> np.ndarray:
625+
def get_indexer(self, target: np.ndarray) -> np.ndarray:
626626
"""
627627
Returns an array giving the positions of each value of `target` in
628628
`self.values`, where -1 represents a value in `target` which does not
629629
appear in `self.values`
630630

631631
Parameters
632632
----------
633-
target : MultiIndex
633+
target : np.ndarray
634634

635635
Returns
636636
-------
637637
np.ndarray[intp_t, ndim=1] of the indexer of `target` into
638638
`self.values`
639639
"""
640-
lab_ints = self._extract_level_codes(target)
641-
return self._base.get_indexer(self, lab_ints)
640+
return self._base.get_indexer(self, target)
642641

643642
def get_indexer_with_fill(self, ndarray target, ndarray values,
644643
str method, object limit) -> np.ndarray:
@@ -741,10 +740,9 @@ cdef class BaseMultiIndexCodesEngine:
741740

742741
return self._base.get_loc(self, lab_int)
743742

744-
def get_indexer_non_unique(self, target):
743+
def get_indexer_non_unique(self, target: np.ndarray) -> np.ndarray:
745744
# target: MultiIndex
746-
lab_ints = self._extract_level_codes(target)
747-
indexer = self._base.get_indexer_non_unique(self, lab_ints)
745+
indexer = self._base.get_indexer_non_unique(self, target)
748746

749747
return indexer
750748

pandas/core/indexes/base.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -3620,9 +3620,8 @@ def _get_indexer(
36203620
else:
36213621
tgt_values = target._get_engine_target()
36223622
if target._is_multi and self._is_multi:
3623-
# error: Incompatible types in assignment (expression has type
3624-
# "Index", variable has type "ndarray[Any, Any]")
3625-
tgt_values = target # type: ignore[assignment]
3623+
tgt_values = self._engine._extract_level_codes(target)
3624+
36263625
indexer = self._engine.get_indexer(tgt_values)
36273626

36283627
return ensure_platform_int(indexer)
@@ -5465,7 +5464,7 @@ def get_indexer_non_unique(
54655464
# self and non-Multi target
54665465
tgt_values = target._get_engine_target()
54675466
if self._is_multi and target._is_multi:
5468-
tgt_values = target
5467+
tgt_values = self._engine._extract_level_codes(target)
54695468

54705469
indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
54715470
return ensure_platform_int(indexer), ensure_platform_int(missing)

0 commit comments

Comments
 (0)