Skip to content

TYP: get_indexer #40612

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Apr 19, 2021
32 changes: 26 additions & 6 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
TypeVar,
Union,
cast,
overload,
)
import warnings

Expand Down Expand Up @@ -165,6 +166,8 @@
)

if TYPE_CHECKING:
from typing import Literal

from pandas import (
CategoricalIndex,
DataFrame,
Expand Down Expand Up @@ -5197,17 +5200,18 @@ def set_value(self, arr, key, value):

Returns
-------
indexer : ndarray of int
indexer : np.ndarray[np.intp]
Integers from 0 to n - 1 indicating that the index at these
positions matches the corresponding target values. Missing values
in the target are marked by -1.
missing : ndarray of int
missing : np.ndarray[np.intp]
An indexer into the target of the values not found.
These correspond to the -1 in the indexer array.
"""

@Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
def get_indexer_non_unique(self, target):
def get_indexer_non_unique(self, target) -> tuple[np.ndarray, np.ndarray]:
# both returned ndarrays are np.intp
target = ensure_index(target)

if not self._should_compare(target) and not is_interval_dtype(self.dtype):
Expand All @@ -5231,7 +5235,7 @@ def get_indexer_non_unique(self, target):
tgt_values = target._get_engine_target()

indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
return ensure_platform_int(indexer), missing
return ensure_platform_int(indexer), ensure_platform_int(missing)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps you could just comment on why this change is necessary, and what it does?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this was from before a separate PR ensured that self._engine.get_indexer_non_unique already returned ndarray[intp], but this makes it a little bit more explicit (and is cheap)


@final
def get_indexer_for(self, target, **kwargs) -> np.ndarray:
Expand All @@ -5243,16 +5247,32 @@ def get_indexer_for(self, target, **kwargs) -> np.ndarray:

Returns
-------
numpy.ndarray
numpy.ndarray[np.intp]
List of indices.
"""
if self._index_as_unique:
return self.get_indexer(target, **kwargs)
indexer, _ = self.get_indexer_non_unique(target)
return indexer

@overload
def _get_indexer_non_comparable(
self, target: Index, method, unique: Literal[True] = ...
) -> np.ndarray:
# returned ndarray is np.intp
...

@overload
def _get_indexer_non_comparable(
self, target: Index, method, unique: Literal[False]
) -> tuple[np.ndarray, np.ndarray]:
# both returned ndarrays are np.intp
...

@final
def _get_indexer_non_comparable(self, target: Index, method, unique: bool = True):
def _get_indexer_non_comparable(
self, target: Index, method, unique: bool = True
) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
"""
Called from get_indexer or get_indexer_non_unique when the target
is of a non-comparable dtype.
Expand Down
16 changes: 11 additions & 5 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from typing import (
Any,
Hashable,
Expand Down Expand Up @@ -422,10 +424,9 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):

target = ibase.ensure_index(target)

missing: List[int]
if self.equals(target):
indexer = None
missing = []
missing = np.array([], dtype=np.intp)
else:
indexer, missing = self.get_indexer_non_unique(np.array(target))

Expand Down Expand Up @@ -498,18 +499,23 @@ def _get_indexer(
limit: Optional[int] = None,
tolerance=None,
) -> np.ndarray:
# returned ndarray is np.intp

if self.equals(target):
return np.arange(len(self), dtype="intp")

return self._get_indexer_non_unique(target._values)[0]

@Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
def get_indexer_non_unique(self, target):
def get_indexer_non_unique(self, target) -> tuple[np.ndarray, np.ndarray]:
# both returned ndarrays are np.intp
target = ibase.ensure_index(target)
return self._get_indexer_non_unique(target._values)

def _get_indexer_non_unique(self, values: ArrayLike):
def _get_indexer_non_unique(
self, values: ArrayLike
) -> tuple[np.ndarray, np.ndarray]:
# both returned ndarrays are np.intp
"""
get_indexer_non_unique but after unrapping the target Index object.
"""
Expand All @@ -528,7 +534,7 @@ def _get_indexer_non_unique(self, values: ArrayLike):
codes = self.categories.get_indexer(values)

indexer, missing = self._engine.get_indexer_non_unique(codes)
return ensure_platform_int(indexer), missing
return ensure_platform_int(indexer), ensure_platform_int(missing)

@doc(Index._convert_list_indexer)
def _convert_list_indexer(self, keyarr):
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,7 @@ def _get_indexer(
limit: Optional[int] = None,
tolerance: Optional[Any] = None,
) -> np.ndarray:
# returned ndarray is np.intp

if isinstance(target, IntervalIndex):
# equal indexes -> 1:1 positional match
Expand Down Expand Up @@ -757,6 +758,7 @@ def _get_indexer(

@Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
def get_indexer_non_unique(self, target: Index) -> Tuple[np.ndarray, np.ndarray]:
# both returned ndarrays are np.intp
target = ensure_index(target)

if isinstance(target, IntervalIndex) and not self._should_compare(target):
Expand All @@ -776,6 +778,7 @@ def get_indexer_non_unique(self, target: Index) -> Tuple[np.ndarray, np.ndarray]
return ensure_platform_int(indexer), ensure_platform_int(missing)

def _get_indexer_pointwise(self, target: Index) -> Tuple[np.ndarray, np.ndarray]:
# both returned ndarrays are np.intp
"""
pointwise implementation for get_indexer and get_indexer_non_unique.
"""
Expand Down