Skip to content

TYP: index.pyi #40486

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions pandas/_libs/index.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import numpy as np

class IndexEngine:
over_size_threshold: bool

def __init__(self, vgetter, n: int): ...

def __contains__(self, val: object) -> bool: ...

# -> int | slice | np.ndarray[bool]
def get_loc(self, val: object) -> int | slice | np.ndarray: ...

def sizeof(self, deep: bool = False) -> int: ...
def __sizeof__(self) -> int: ...

@property
def is_unique(self) -> bool: ...

@property
def is_monotonic_increasing(self) -> bool: ...

@property
def is_monotonic_decreasing(self) -> bool: ...

def get_backfill_indexer(self, other: np.ndarray, limit: int | None =...) -> np.ndarray: ...
def get_pad_indexer(self, other: np.ndarray, limit: int | None =...) -> np.ndarray: ...

@property
def is_mapping_populated(self) -> bool: ...

def clear_mapping(self): ...
def get_indexer(self, values: np.ndarray) -> np.ndarray: ... # np.ndarray[np.intp]
def get_indexer_non_unique(
self,
targets: np.ndarray,
) -> tuple[
np.ndarray, # np.ndarray[np.intp]
np.ndarray, # np.ndarray[np.intp]
]: ...


class Float64Engine(IndexEngine): ...
class Float32Engine(IndexEngine): ...

class Int64Engine(IndexEngine): ...
class Int32Engine(IndexEngine): ...
class Int16Engine(IndexEngine): ...
class Int8Engine(IndexEngine): ...

class UInt64Engine(IndexEngine): ...
class UInt32Engine(IndexEngine): ...
class UInt16Engine(IndexEngine): ...
class UInt8Engine(IndexEngine): ...

class ObjectEngine(IndexEngine): ...

class DatetimeEngine(Int64Engine): ...
class TimedeltaEngine(DatetimeEngine): ...
class PeriodEngine(Int64Engine): ...


class BaseMultiIndexCodesEngine:
levels: list[np.ndarray]
offsets: np.ndarray # ndarray[uint64_t, ndim=1]

def __init__(
self,
levels: list[np.ndarray], # all entries hashable
labels: list[np.ndarray], # all entries integer-dtyped
offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1]
): ...

def get_indexer(
self,
target: np.ndarray, # np.ndarray[object]
) -> np.ndarray: ... # np.ndarray[np.intp]

def _extract_level_codes(self, target: object): ...

def get_indexer_with_fill(
self,
target: np.ndarray, # np.ndarray[object] of tuples
values: np.ndarray, # np.ndarray[object] of tuples
method: str,
limit: int | None,
) -> np.ndarray: ... # np.ndarray[np.int64]
25 changes: 14 additions & 11 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ cdef class IndexEngine:
self.monotonic_inc = 0
self.monotonic_dec = 0

def get_indexer(self, ndarray values):
def get_indexer(self, ndarray values) -> np.ndarray:
self._ensure_mapping_populated()
return self.mapping.lookup(values)

Expand All @@ -269,6 +269,11 @@ cdef class IndexEngine:
return the labels in the same order as the target
and a missing indexer into the targets (which correspond
to the -1 indices in the results

Returns
-------
indexer : np.ndarray[np.intp]
missing : np.ndarray[np.intp]
"""
cdef:
ndarray values, x
Expand Down Expand Up @@ -455,7 +460,7 @@ cdef class DatetimeEngine(Int64Engine):
# we may get datetime64[ns] or timedelta64[ns], cast these to int64
return super().get_indexer_non_unique(targets.view("i8"))

def get_indexer(self, ndarray values):
def get_indexer(self, ndarray values) -> np.ndarray:
self._ensure_mapping_populated()
if values.dtype != self._get_box_dtype():
return np.repeat(-1, len(values)).astype(np.intp)
Expand Down Expand Up @@ -572,17 +577,17 @@ cdef class BaseMultiIndexCodesEngine:
# integers representing labels: we will use its get_loc and get_indexer
self._base.__init__(self, lambda: lab_ints, len(lab_ints))

def _codes_to_ints(self, codes):
def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
raise NotImplementedError("Implemented by subclass")

def _extract_level_codes(self, object target):
def _extract_level_codes(self, ndarray[object] target) -> np.ndarray:
"""
Map the requested list of (tuple) keys to their integer representations
for searching in the underlying integer index.

Parameters
----------
target : list-like of keys
target : ndarray[object]
Each key is a tuple, with a label for each level of the index.

Returns
Expand All @@ -607,7 +612,7 @@ cdef class BaseMultiIndexCodesEngine:

Returns
-------
np.ndarray[int64_t, ndim=1] of the indexer of `target` into
np.ndarray[intp_t, ndim=1] of the indexer of `target` into
`self.values`
"""
lab_ints = self._extract_level_codes(target)
Expand Down Expand Up @@ -635,15 +640,15 @@ cdef class BaseMultiIndexCodesEngine:
the same as the length of all tuples in `values`
values : ndarray[object] of tuples
must be sorted and all have the same length. Should be the set of
the MultiIndex's values. Needed only if `method` is not None
the MultiIndex's values.
method: string
"backfill" or "pad"
limit: int or None
if provided, limit the number of fills to this value

Returns
-------
np.ndarray[int64_t, ndim=1] of the indexer of `target` into `values`,
np.ndarray[intp_t, ndim=1] of the indexer of `target` into `values`,
filled with the `method` (and optionally `limit`) specified
"""
assert method in ("backfill", "pad")
Expand Down Expand Up @@ -714,9 +719,7 @@ cdef class BaseMultiIndexCodesEngine:

return self._base.get_loc(self, lab_int)

def get_indexer_non_unique(self, ndarray target):
# This needs to be overridden just because the default one works on
# target._values, and target can be itself a MultiIndex.
def get_indexer_non_unique(self, ndarray[object] target):

lab_ints = self._extract_level_codes(target)
indexer = self._base.get_indexer_non_unique(self, lab_ints)
Expand Down
7 changes: 1 addition & 6 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,12 +405,7 @@ def _cmp_method(self, other, op):
_str_na_value = StringDtype.na_value

def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None):
from pandas.arrays import (
BooleanArray,
IntegerArray,
StringArray,
)
from pandas.core.arrays.string_ import StringDtype
from pandas.arrays import BooleanArray

if dtype is None:
dtype = StringDtype()
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def _outer_indexer(
# would we like our indexing holder to defer to us
_defer_to_indexing = False

_engine_type = libindex.ObjectEngine
_engine_type: Type[libindex.IndexEngine] = libindex.ObjectEngine
# whether we support partial string indexing. Overridden
# in DatetimeIndex and PeriodIndex
_supports_partial_string_indexing = False
Expand Down Expand Up @@ -723,8 +723,8 @@ def _cleanup(self) -> None:
self._engine.clear_mapping()

@cache_readonly
def _engine(self) -> libindex.ObjectEngine:
# property, for now, slow to look up
def _engine(self) -> libindex.IndexEngine:
# For base class (object dtype) we get ObjectEngine

# to avoid a reference cycle, bind `target_values` to a local variable, so
# `self` is not passed into the lambda.
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
Any,
List,
Optional,
Sequence,
Tuple,
TypeVar,
Union,
Expand Down Expand Up @@ -536,7 +537,7 @@ def shift(self: _T, periods: int = 1, freq=None) -> _T:
# --------------------------------------------------------------------
# List-like Methods

def _get_delete_freq(self, loc: int):
def _get_delete_freq(self, loc: Union[int, slice, Sequence[int]]):
"""
Find the `freq` for self.delete(loc).
"""
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ def from_tuples(
arrays = [[]] * len(names)
elif isinstance(tuples, (np.ndarray, Index)):
if isinstance(tuples, Index):
tuples = tuples._values
tuples = np.asarray(tuples._values)

arrays = list(lib.tuples_to_object_array(tuples).T)
elif isinstance(tuples, list):
Expand Down Expand Up @@ -2689,11 +2689,16 @@ def _get_indexer(
target, method=method, limit=limit, tolerance=tolerance
)

# TODO: explicitly raise here? we only have one test that
# gets here, and it is checking that we raise with method="nearest"

if method == "pad" or method == "backfill":
if tolerance is not None:
raise NotImplementedError(
"tolerance not implemented yet for MultiIndex"
)
# TODO: get_indexer_with_fill docstring says values must be _sorted_
# but that doesn't appear to be enforced
indexer = self._engine.get_indexer_with_fill(
target=target._values, values=self._values, method=method, limit=limit
)
Expand All @@ -2705,6 +2710,8 @@ def _get_indexer(
else:
indexer = self._engine.get_indexer(target._values)

# Note: we only get here (in extant tests at least) with
# target.nlevels == self.nlevels
return ensure_platform_int(indexer)

def get_slice_bound(
Expand Down