diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f7cec262ca302..5221e33e049f3 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -584,7 +584,7 @@ cdef class BaseMultiIndexCodesEngine: def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray: raise NotImplementedError("Implemented by subclass") - def _extract_level_codes(self, ndarray[object] target) -> np.ndarray: + def _extract_level_codes(self, list target) -> np.ndarray: """ Map the requested list of (tuple) keys to their integer representations for searching in the underlying integer index. @@ -599,11 +599,12 @@ cdef class BaseMultiIndexCodesEngine: int_keys : 1-dimensional array of dtype uint64 or object Integers representing one combination each """ - level_codes = [lev.get_indexer(codes) + 1 for lev, codes - in zip(self.levels, zip(*target))] + n_levels = len(self.levels) + level_codes = [self.levels[i].get_indexer(target[i]) + 1 + for i in range(n_levels)] return self._codes_to_ints(np.array(level_codes, dtype='uint64').T) - def get_indexer(self, ndarray[object] target) -> np.ndarray: + def get_indexer(self, list target) -> np.ndarray: """ Returns an array giving the positions of each value of `target` in `self.values`, where -1 represents a value in `target` which does not diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 794f13bbfb6b1..909bede88707b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -715,6 +715,24 @@ def _values(self) -> np.ndarray: arr = lib.fast_zip(values) return arr + @cache_readonly + def _values_for_indexer(self): + values = [] + + for i in range(self.nlevels): + vals = self._get_level_values(i) + if is_categorical_dtype(vals.dtype): + vals = cast("CategoricalIndex", vals) + vals = vals._data._internal_get_values() + if isinstance(vals.dtype, ExtensionDtype): + vals = vals.astype(object) + # error: Incompatible types in assignment (expression has type "ndarray", + # variable has type "Index") + vals = np.array(vals, copy=False) # type: ignore[assignment] + values.append(vals) + + return values + @property def values(self) -> np.ndarray: return self._values @@ -2666,6 +2684,15 @@ def _get_partial_string_timestamp_match_key(self, key): return key + def _get_engine_target(self) -> np.ndarray: + """ + Override base + Get the ndarray that we can pass to the IndexEngine constructor. + """ + # error: Incompatible return value type (got "Union[ExtensionArray, + # ndarray]", expected "ndarray") + return self._values_for_indexer # type: ignore[return-value] + def _get_indexer( self, target: Index, @@ -2686,7 +2713,7 @@ def _get_indexer( # let's instead try with a straight Index if method is None: - return Index(self._values).get_indexer( + return Index(self._values_for_indexer).get_indexer( target, method=method, limit=limit, tolerance=tolerance ) @@ -2701,7 +2728,10 @@ def _get_indexer( # TODO: get_indexer_with_fill docstring says values must be _sorted_ # but that doesn't appear to be enforced indexer = self._engine.get_indexer_with_fill( - target=target._values, values=self._values, method=method, limit=limit + target=target._values_for_indexer, + values=self._values_for_indexer, + method=method, + limit=limit, ) elif method == "nearest": raise NotImplementedError( @@ -2709,7 +2739,7 @@ def _get_indexer( "for MultiIndex; see GitHub issue 9365" ) else: - indexer = self._engine.get_indexer(target._values) + indexer = self._engine.get_indexer(target._values_for_indexer) # Note: we only get here (in extant tests at least) with # target.nlevels == self.nlevels