Skip to content

Commit 5dba928

Browse files
committed
avoid zip and unzip to improve the speed of get_index
1 parent 0158382 commit 5dba928

File tree

2 files changed

+14
-16
lines changed

2 files changed

+14
-16
lines changed

pandas/_libs/index.pyx

+5-4
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ cdef class BaseMultiIndexCodesEngine:
584584
def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
585585
raise NotImplementedError("Implemented by subclass")
586586

587-
def _extract_level_codes(self, ndarray[object] target) -> np.ndarray:
587+
def _extract_level_codes(self, list target) -> np.ndarray:
588588
"""
589589
Map the requested list of (tuple) keys to their integer representations
590590
for searching in the underlying integer index.
@@ -599,11 +599,12 @@ cdef class BaseMultiIndexCodesEngine:
599599
int_keys : 1-dimensional array of dtype uint64 or object
600600
Integers representing one combination each
601601
"""
602-
level_codes = [lev.get_indexer(codes) + 1 for lev, codes
603-
in zip(self.levels, zip(*target))]
602+
n_levels = len(self.levels)
603+
level_codes = [self.levels[i].get_indexer(target[i]) + 1
604+
for i in range(n_levels)]
604605
return self._codes_to_ints(np.array(level_codes, dtype='uint64').T)
605606

606-
def get_indexer(self, ndarray[object] target) -> np.ndarray:
607+
def get_indexer(self, list target) -> np.ndarray:
607608
"""
608609
Returns an array giving the positions of each value of `target` in
609610
`self.values`, where -1 represents a value in `target` which does not

pandas/core/indexes/multi.py

+9-12
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,7 @@
5858
pandas_dtype,
5959
)
6060
from pandas.core.dtypes.dtypes import ExtensionDtype
61-
from pandas.core.dtypes.generic import (
62-
ABCDataFrame,
63-
ABCDatetimeIndex,
64-
ABCTimedeltaIndex,
65-
)
61+
from pandas.core.dtypes.generic import ABCDataFrame
6662
from pandas.core.dtypes.missing import (
6763
array_equivalent,
6864
isna,
@@ -696,24 +692,25 @@ def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex:
696692
@cache_readonly
697693
def _values(self) -> np.ndarray:
698694
# We override here, since our parent uses _data, which we don't use.
695+
arr = lib.fast_zip(self._list_values)
696+
return arr
697+
698+
@cache_readonly
699+
def _list_values(self) -> list:
699700
values = []
700701

701702
for i in range(self.nlevels):
702703
vals = self._get_level_values(i)
703704
if is_categorical_dtype(vals.dtype):
704705
vals = cast("CategoricalIndex", vals)
705706
vals = vals._data._internal_get_values()
706-
if isinstance(vals.dtype, ExtensionDtype) or isinstance(
707-
vals, (ABCDatetimeIndex, ABCTimedeltaIndex)
708-
):
707+
if isinstance(vals.dtype, ExtensionDtype) or hasattr(vals, "_box_values"):
709708
vals = vals.astype(object)
710709
# error: Incompatible types in assignment (expression has type "ndarray",
711710
# variable has type "Index")
712711
vals = np.array(vals, copy=False) # type: ignore[assignment]
713712
values.append(vals)
714-
715-
arr = lib.fast_zip(values)
716-
return arr
713+
return values
717714

718715
@property
719716
def values(self) -> np.ndarray:
@@ -2709,7 +2706,7 @@ def _get_indexer(
27092706
"for MultiIndex; see GitHub issue 9365"
27102707
)
27112708
else:
2712-
indexer = self._engine.get_indexer(target._values)
2709+
indexer = self._engine.get_indexer(target._list_values)
27132710

27142711
# Note: we only get here (in extant tests at least) with
27152712
# target.nlevels == self.nlevels

0 commit comments

Comments
 (0)