diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f6f36f6ad523b..9159fa03c12c0 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -259,11 +259,11 @@ cdef class IndexEngine: self.monotonic_inc = 0 self.monotonic_dec = 0 - def get_indexer(self, values): + def get_indexer(self, ndarray values): self._ensure_mapping_populated() return self.mapping.lookup(values) - def get_indexer_non_unique(self, targets): + def get_indexer_non_unique(self, ndarray targets): """ Return an indexer suitable for taking from a non unique index return the labels in the same order as the target @@ -451,11 +451,11 @@ cdef class DatetimeEngine(Int64Engine): except KeyError: raise KeyError(val) - def get_indexer_non_unique(self, targets): + def get_indexer_non_unique(self, ndarray targets): # we may get datetime64[ns] or timedelta64[ns], cast these to int64 return super().get_indexer_non_unique(targets.view("i8")) - def get_indexer(self, values): + def get_indexer(self, ndarray values): self._ensure_mapping_populated() if values.dtype != self._get_box_dtype(): return np.repeat(-1, len(values)).astype('i4') @@ -594,7 +594,7 @@ cdef class BaseMultiIndexCodesEngine: in zip(self.levels, zip(*target))] return self._codes_to_ints(np.array(level_codes, dtype='uint64').T) - def get_indexer_no_fill(self, object target) -> np.ndarray: + def get_indexer(self, ndarray[object] target) -> np.ndarray: """ Returns an array giving the positions of each value of `target` in `self.values`, where -1 represents a value in `target` which does not @@ -602,7 +602,7 @@ cdef class BaseMultiIndexCodesEngine: Parameters ---------- - target : list-like of keys + target : ndarray[object] Each key is a tuple, with a label for each level of the index Returns @@ -613,8 +613,8 @@ cdef class BaseMultiIndexCodesEngine: lab_ints = self._extract_level_codes(target) return self._base.get_indexer(self, lab_ints) - def get_indexer(self, object target, object values = None, - object method = None, object limit = None) -> np.ndarray: + def get_indexer_with_fill(self, ndarray target, ndarray values, + str method, object limit) -> np.ndarray: """ Returns an array giving the positions of each value of `target` in `values`, where -1 represents a value in `target` which does not @@ -630,15 +630,15 @@ cdef class BaseMultiIndexCodesEngine: Parameters ---------- - target: list-like of tuples + target: ndarray[object] of tuples need not be sorted, but all must have the same length, which must be the same as the length of all tuples in `values` - values : list-like of tuples + values : ndarray[object] of tuples must be sorted and all have the same length. Should be the set of the MultiIndex's values. Needed only if `method` is not None method: string "backfill" or "pad" - limit: int, optional + limit: int or None if provided, limit the number of fills to this value Returns @@ -646,9 +646,6 @@ cdef class BaseMultiIndexCodesEngine: np.ndarray[int64_t, ndim=1] of the indexer of `target` into `values`, filled with the `method` (and optionally `limit`) specified """ - if method is None: - return self.get_indexer_no_fill(target) - assert method in ("backfill", "pad") cdef: int64_t i, j, next_code @@ -658,8 +655,8 @@ cdef class BaseMultiIndexCodesEngine: ndarray[int64_t, ndim=1] new_codes, new_target_codes ndarray[int64_t, ndim=1] sorted_indexer - target_order = np.argsort(target.values).astype('int64') - target_values = target.values[target_order] + target_order = np.argsort(target).astype('int64') + target_values = target[target_order] num_values, num_target_values = len(values), len(target_values) new_codes, new_target_codes = ( np.empty((num_values,)).astype('int64'), @@ -718,7 +715,7 @@ cdef class BaseMultiIndexCodesEngine: return self._base.get_loc(self, lab_int) - def get_indexer_non_unique(self, object target): + def get_indexer_non_unique(self, ndarray target): # This needs to be overridden just because the default one works on # target._values, and target can be itself a MultiIndex. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 12b343ab5d895..3a468758ab3fd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3378,7 +3378,11 @@ def get_loc(self, key, method=None, tolerance=None): @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) @final def get_indexer( - self, target, method=None, limit=None, tolerance=None + self, + target, + method: Optional[str_t] = None, + limit: Optional[int] = None, + tolerance=None, ) -> np.ndarray: method = missing.clean_reindex_fill_method(method) @@ -3403,7 +3407,11 @@ def get_indexer( return self._get_indexer(target, method, limit, tolerance) def _get_indexer( - self, target: Index, method=None, limit=None, tolerance=None + self, + target: Index, + method: Optional[str_t] = None, + limit: Optional[int] = None, + tolerance=None, ) -> np.ndarray: if tolerance is not None: tolerance = self._convert_tolerance(tolerance, target) @@ -3467,7 +3475,7 @@ def _convert_tolerance( @final def _get_fill_indexer( - self, target: Index, method: str_t, limit=None, tolerance=None + self, target: Index, method: str_t, limit: Optional[int] = None, tolerance=None ) -> np.ndarray: target_values = target._get_engine_target() @@ -3487,7 +3495,7 @@ def _get_fill_indexer( @final def _get_fill_indexer_searchsorted( - self, target: Index, method: str_t, limit=None + self, target: Index, method: str_t, limit: Optional[int] = None ) -> np.ndarray: """ Fallback pad/backfill get_indexer that works for monotonic decreasing @@ -3520,7 +3528,9 @@ def _get_fill_indexer_searchsorted( return indexer @final - def _get_nearest_indexer(self, target: Index, limit, tolerance) -> np.ndarray: + def _get_nearest_indexer( + self, target: Index, limit: Optional[int], tolerance + ) -> np.ndarray: """ Get the indexer for the nearest index labels; requires an index with values that can be subtracted from each other (e.g., not strings or diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 39f8e0b6e431e..f372db5287604 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -492,7 +492,11 @@ def _maybe_cast_indexer(self, key) -> int: return self._data._unbox_scalar(key) def _get_indexer( - self, target: Index, method=None, limit=None, tolerance=None + self, + target: Index, + method: Optional[str] = None, + limit: Optional[int] = None, + tolerance=None, ) -> np.ndarray: if self.equals(target): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0d89e75c097c1..97492f35232e3 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2676,7 +2676,11 @@ def _get_partial_string_timestamp_match_key(self, key): return key def _get_indexer( - self, target: Index, method=None, limit=None, tolerance=None + self, + target: Index, + method: Optional[str] = None, + limit: Optional[int] = None, + tolerance=None, ) -> np.ndarray: # empty indexer @@ -2699,8 +2703,8 @@ def _get_indexer( raise NotImplementedError( "tolerance not implemented yet for MultiIndex" ) - indexer = self._engine.get_indexer( - values=self._values, target=target, method=method, limit=limit + indexer = self._engine.get_indexer_with_fill( + target=target._values, values=self._values, method=method, limit=limit ) elif method == "nearest": raise NotImplementedError( @@ -2708,7 +2712,7 @@ def _get_indexer( "for MultiIndex; see GitHub issue 9365" ) else: - indexer = self._engine.get_indexer(target) + indexer = self._engine.get_indexer(target._values) return ensure_platform_int(indexer) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 010568af4982c..456d87766bdb7 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -396,7 +396,13 @@ def get_loc(self, key, method=None, tolerance=None): raise KeyError(key) return super().get_loc(key, method=method, tolerance=tolerance) - def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None): + def _get_indexer( + self, + target: Index, + method: Optional[str] = None, + limit: Optional[int] = None, + tolerance=None, + ): if com.any_not_none(method, tolerance, limit): return super()._get_indexer( target, method=method, tolerance=tolerance, limit=limit