diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c0840d8e0d4b0..6ee0b5574226a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3141,10 +3141,9 @@ def get_loc(self, key, method=None, tolerance=None): def get_indexer( self, target, method=None, limit=None, tolerance=None ) -> np.ndarray: + method = missing.clean_reindex_fill_method(method) target = ensure_index(target) - if tolerance is not None: - tolerance = self._convert_tolerance(tolerance, target) # Treat boolean labels passed to a numeric index as not found. Without # this fix False and True would be treated as 0 and 1 respectively. @@ -3158,6 +3157,14 @@ def get_indexer( ptarget, method=method, limit=limit, tolerance=tolerance ) + return self._get_indexer(target, method, limit, tolerance) + + def _get_indexer( + self, target: "Index", method=None, limit=None, tolerance=None + ) -> np.ndarray: + if tolerance is not None: + tolerance = self._convert_tolerance(tolerance, target) + if not is_dtype_equal(self.dtype, target.dtype): this = self.astype(object) target = target.astype(object) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 377fff5f85e92..6c9f839f4b8b2 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -24,7 +24,6 @@ import pandas.core.indexes.base as ibase from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name from pandas.core.indexes.extension import NDArrayBackedExtensionIndex, inherit_names -import pandas.core.missing as missing _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update({"target_klass": "CategoricalIndex"}) @@ -496,9 +495,9 @@ def _maybe_cast_indexer(self, key) -> int: return self._data._unbox_scalar(key) @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) - def get_indexer(self, target, method=None, limit=None, tolerance=None): - method = missing.clean_reindex_fill_method(method) - target = ibase.ensure_index(target) + def _get_indexer( + self, target: "Index", method=None, limit=None, tolerance=None + ) -> np.ndarray: self._check_indexing_method(method) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3036e944d1e51..5392730ba4ce3 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -663,9 +663,9 @@ def get_loc( ) ) @Appender(_index_shared_docs["get_indexer"]) - def get_indexer( + def _get_indexer( self, - target: AnyArrayLike, + target: Index, method: Optional[str] = None, limit: Optional[int] = None, tolerance: Optional[Any] = None, @@ -679,35 +679,33 @@ def get_indexer( "use IntervalIndex.get_indexer_non_unique" ) - target_as_index = ensure_index(target) - - if isinstance(target_as_index, IntervalIndex): + if isinstance(target, IntervalIndex): # equal indexes -> 1:1 positional match - if self.equals(target_as_index): + if self.equals(target): return np.arange(len(self), dtype="intp") - if self._is_non_comparable_own_type(target_as_index): + if self._is_non_comparable_own_type(target): # different closed or incompatible subtype -> no matches - return np.repeat(np.intp(-1), len(target_as_index)) + return np.repeat(np.intp(-1), len(target)) - # non-overlapping -> at most one match per interval in target_as_index + # non-overlapping -> at most one match per interval in target # want exact matches -> need both left/right to match, so defer to # left/right get_indexer, compare elementwise, equality -> match - left_indexer = self.left.get_indexer(target_as_index.left) - right_indexer = self.right.get_indexer(target_as_index.right) + left_indexer = self.left.get_indexer(target.left) + right_indexer = self.right.get_indexer(target.right) indexer = np.where(left_indexer == right_indexer, left_indexer, -1) - elif is_categorical_dtype(target_as_index.dtype): - target_as_index = cast("CategoricalIndex", target_as_index) + elif is_categorical_dtype(target.dtype): + target = cast("CategoricalIndex", target) # get an indexer for unique categories then propagate to codes via take_1d - categories_indexer = self.get_indexer(target_as_index.categories) - indexer = take_1d(categories_indexer, target_as_index.codes, fill_value=-1) - elif not is_object_dtype(target_as_index): + categories_indexer = self.get_indexer(target.categories) + indexer = take_1d(categories_indexer, target.codes, fill_value=-1) + elif not is_object_dtype(target): # homogeneous scalar index: use IntervalTree - target_as_index = self._maybe_convert_i8(target_as_index) - indexer = self._engine.get_indexer(target_as_index.values) + target = self._maybe_convert_i8(target) + indexer = self._engine.get_indexer(target.values) else: # heterogeneous scalar index: defer elementwise to get_loc - return self._get_indexer_pointwise(target_as_index)[0] + return self._get_indexer_pointwise(target)[0] return ensure_platform_int(indexer) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 911bbc44bb599..4a2a0235e269c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -55,7 +55,6 @@ ) from pandas.core.indexes.frozen import FrozenList from pandas.core.indexes.numeric import Int64Index -import pandas.core.missing as missing from pandas.core.ops.invalid import make_invalid_op from pandas.core.sorting import ( get_group_index, @@ -2597,9 +2596,7 @@ def _get_partial_string_timestamp_match_key(self, key): return key @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) - def get_indexer(self, target, method=None, limit=None, tolerance=None): - method = missing.clean_reindex_fill_method(method) - target = ensure_index(target) + def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None): # empty indexer if is_list_like(target) and not len(target): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 26bba4653007f..fb654a36fc75f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -449,14 +449,13 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) # Indexing Methods @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) - def get_indexer(self, target, method=None, limit=None, tolerance=None): - target = ensure_index(target) + def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None): if not self._should_compare(target): return self._get_indexer_non_comparable(target, method, unique=True) if isinstance(target, PeriodIndex): - target = target._get_engine_target() # i.e. target.asi8 + target = target._int64index # i.e. target.asi8 self_index = self._int64index else: self_index = self @@ -467,7 +466,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): # convert tolerance to i8 tolerance = self._maybe_convert_timedelta(tolerance) - return Index.get_indexer(self_index, target, method, limit, tolerance) + return Index._get_indexer(self_index, target, method, limit, tolerance) def get_loc(self, key, method=None, tolerance=None): """ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ec896d94a20ba..f14c126180642 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -355,9 +355,9 @@ def get_loc(self, key, method=None, tolerance=None): return super().get_loc(key, method=method, tolerance=tolerance) @Appender(_index_shared_docs["get_indexer"]) - def get_indexer(self, target, method=None, limit=None, tolerance=None): + def _get_indexer(self, target, method=None, limit=None, tolerance=None): if com.any_not_none(method, tolerance, limit) or not is_list_like(target): - return super().get_indexer( + return super()._get_indexer( target, method=method, tolerance=tolerance, limit=limit ) @@ -371,7 +371,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): target_array = np.asarray(target) if not (is_signed_integer_dtype(target_array) and target_array.ndim == 1): # checks/conversions/roundings are delegated to general method - return super().get_indexer(target, method=method, tolerance=tolerance) + return super()._get_indexer(target, method=method, tolerance=tolerance) locs = target_array - start valid = (locs % step == 0) & (locs >= 0) & (target_array < stop)