Skip to content

REF: implement Index._get_indexer #38308

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3141,10 +3141,9 @@ def get_loc(self, key, method=None, tolerance=None):
def get_indexer(
self, target, method=None, limit=None, tolerance=None
) -> np.ndarray:

method = missing.clean_reindex_fill_method(method)
target = ensure_index(target)
if tolerance is not None:
tolerance = self._convert_tolerance(tolerance, target)

# Treat boolean labels passed to a numeric index as not found. Without
# this fix False and True would be treated as 0 and 1 respectively.
Expand All @@ -3158,6 +3157,14 @@ def get_indexer(
ptarget, method=method, limit=limit, tolerance=tolerance
)

return self._get_indexer(target, method, limit, tolerance)

def _get_indexer(
self, target: "Index", method=None, limit=None, tolerance=None
) -> np.ndarray:
if tolerance is not None:
tolerance = self._convert_tolerance(tolerance, target)

if not is_dtype_equal(self.dtype, target.dtype):
this = self.astype(object)
target = target.astype(object)
Expand Down
7 changes: 3 additions & 4 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
from pandas.core.indexes.extension import NDArrayBackedExtensionIndex, inherit_names
import pandas.core.missing as missing

_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update({"target_klass": "CategoricalIndex"})
Expand Down Expand Up @@ -496,9 +495,9 @@ def _maybe_cast_indexer(self, key) -> int:
return self._data._unbox_scalar(key)

@Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
def get_indexer(self, target, method=None, limit=None, tolerance=None):
method = missing.clean_reindex_fill_method(method)
target = ibase.ensure_index(target)
def _get_indexer(
self, target: "Index", method=None, limit=None, tolerance=None
) -> np.ndarray:

self._check_indexing_method(method)

Expand Down
36 changes: 17 additions & 19 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,9 +663,9 @@ def get_loc(
)
)
@Appender(_index_shared_docs["get_indexer"])
def get_indexer(
def _get_indexer(
self,
target: AnyArrayLike,
target: Index,
method: Optional[str] = None,
limit: Optional[int] = None,
tolerance: Optional[Any] = None,
Expand All @@ -679,35 +679,33 @@ def get_indexer(
"use IntervalIndex.get_indexer_non_unique"
)

target_as_index = ensure_index(target)

if isinstance(target_as_index, IntervalIndex):
if isinstance(target, IntervalIndex):
# equal indexes -> 1:1 positional match
if self.equals(target_as_index):
if self.equals(target):
return np.arange(len(self), dtype="intp")

if self._is_non_comparable_own_type(target_as_index):
if self._is_non_comparable_own_type(target):
# different closed or incompatible subtype -> no matches
return np.repeat(np.intp(-1), len(target_as_index))
return np.repeat(np.intp(-1), len(target))

# non-overlapping -> at most one match per interval in target_as_index
# non-overlapping -> at most one match per interval in target
# want exact matches -> need both left/right to match, so defer to
# left/right get_indexer, compare elementwise, equality -> match
left_indexer = self.left.get_indexer(target_as_index.left)
right_indexer = self.right.get_indexer(target_as_index.right)
left_indexer = self.left.get_indexer(target.left)
right_indexer = self.right.get_indexer(target.right)
indexer = np.where(left_indexer == right_indexer, left_indexer, -1)
elif is_categorical_dtype(target_as_index.dtype):
target_as_index = cast("CategoricalIndex", target_as_index)
elif is_categorical_dtype(target.dtype):
target = cast("CategoricalIndex", target)
# get an indexer for unique categories then propagate to codes via take_1d
categories_indexer = self.get_indexer(target_as_index.categories)
indexer = take_1d(categories_indexer, target_as_index.codes, fill_value=-1)
elif not is_object_dtype(target_as_index):
categories_indexer = self.get_indexer(target.categories)
indexer = take_1d(categories_indexer, target.codes, fill_value=-1)
elif not is_object_dtype(target):
# homogeneous scalar index: use IntervalTree
target_as_index = self._maybe_convert_i8(target_as_index)
indexer = self._engine.get_indexer(target_as_index.values)
target = self._maybe_convert_i8(target)
indexer = self._engine.get_indexer(target.values)
else:
# heterogeneous scalar index: defer elementwise to get_loc
return self._get_indexer_pointwise(target_as_index)[0]
return self._get_indexer_pointwise(target)[0]

return ensure_platform_int(indexer)

Expand Down
5 changes: 1 addition & 4 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
)
from pandas.core.indexes.frozen import FrozenList
from pandas.core.indexes.numeric import Int64Index
import pandas.core.missing as missing
from pandas.core.ops.invalid import make_invalid_op
from pandas.core.sorting import (
get_group_index,
Expand Down Expand Up @@ -2597,9 +2596,7 @@ def _get_partial_string_timestamp_match_key(self, key):
return key

@Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
def get_indexer(self, target, method=None, limit=None, tolerance=None):
method = missing.clean_reindex_fill_method(method)
target = ensure_index(target)
def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None):

# empty indexer
if is_list_like(target) and not len(target):
Expand Down
7 changes: 3 additions & 4 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,14 +449,13 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
# Indexing Methods

@Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
def get_indexer(self, target, method=None, limit=None, tolerance=None):
target = ensure_index(target)
def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None):

if not self._should_compare(target):
return self._get_indexer_non_comparable(target, method, unique=True)

if isinstance(target, PeriodIndex):
target = target._get_engine_target() # i.e. target.asi8
target = target._int64index # i.e. target.asi8
self_index = self._int64index
else:
self_index = self
Expand All @@ -467,7 +466,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
# convert tolerance to i8
tolerance = self._maybe_convert_timedelta(tolerance)

return Index.get_indexer(self_index, target, method, limit, tolerance)
return Index._get_indexer(self_index, target, method, limit, tolerance)

def get_loc(self, key, method=None, tolerance=None):
"""
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,9 +355,9 @@ def get_loc(self, key, method=None, tolerance=None):
return super().get_loc(key, method=method, tolerance=tolerance)

@Appender(_index_shared_docs["get_indexer"])
def get_indexer(self, target, method=None, limit=None, tolerance=None):
def _get_indexer(self, target, method=None, limit=None, tolerance=None):
if com.any_not_none(method, tolerance, limit) or not is_list_like(target):
return super().get_indexer(
return super()._get_indexer(
target, method=method, tolerance=tolerance, limit=limit
)

Expand All @@ -371,7 +371,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
target_array = np.asarray(target)
if not (is_signed_integer_dtype(target_array) and target_array.ndim == 1):
# checks/conversions/roundings are delegated to general method
return super().get_indexer(target, method=method, tolerance=tolerance)
return super()._get_indexer(target, method=method, tolerance=tolerance)

locs = target_array - start
valid = (locs % step == 0) & (locs >= 0) & (target_array < stop)
Expand Down