Skip to content

Commit 872fc29

Browse files
authored
TYP: index.pyi (#40486)
1 parent cc63476 commit 872fc29

File tree

6 files changed

+114
-22
lines changed

6 files changed

+114
-22
lines changed

pandas/_libs/index.pyi

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import numpy as np
2+
3+
class IndexEngine:
4+
over_size_threshold: bool
5+
6+
def __init__(self, vgetter, n: int): ...
7+
8+
def __contains__(self, val: object) -> bool: ...
9+
10+
# -> int | slice | np.ndarray[bool]
11+
def get_loc(self, val: object) -> int | slice | np.ndarray: ...
12+
13+
def sizeof(self, deep: bool = False) -> int: ...
14+
def __sizeof__(self) -> int: ...
15+
16+
@property
17+
def is_unique(self) -> bool: ...
18+
19+
@property
20+
def is_monotonic_increasing(self) -> bool: ...
21+
22+
@property
23+
def is_monotonic_decreasing(self) -> bool: ...
24+
25+
def get_backfill_indexer(self, other: np.ndarray, limit: int | None =...) -> np.ndarray: ...
26+
def get_pad_indexer(self, other: np.ndarray, limit: int | None =...) -> np.ndarray: ...
27+
28+
@property
29+
def is_mapping_populated(self) -> bool: ...
30+
31+
def clear_mapping(self): ...
32+
def get_indexer(self, values: np.ndarray) -> np.ndarray: ... # np.ndarray[np.intp]
33+
def get_indexer_non_unique(
34+
self,
35+
targets: np.ndarray,
36+
) -> tuple[
37+
np.ndarray, # np.ndarray[np.intp]
38+
np.ndarray, # np.ndarray[np.intp]
39+
]: ...
40+
41+
42+
class Float64Engine(IndexEngine): ...
43+
class Float32Engine(IndexEngine): ...
44+
45+
class Int64Engine(IndexEngine): ...
46+
class Int32Engine(IndexEngine): ...
47+
class Int16Engine(IndexEngine): ...
48+
class Int8Engine(IndexEngine): ...
49+
50+
class UInt64Engine(IndexEngine): ...
51+
class UInt32Engine(IndexEngine): ...
52+
class UInt16Engine(IndexEngine): ...
53+
class UInt8Engine(IndexEngine): ...
54+
55+
class ObjectEngine(IndexEngine): ...
56+
57+
class DatetimeEngine(Int64Engine): ...
58+
class TimedeltaEngine(DatetimeEngine): ...
59+
class PeriodEngine(Int64Engine): ...
60+
61+
62+
class BaseMultiIndexCodesEngine:
63+
levels: list[np.ndarray]
64+
offsets: np.ndarray # ndarray[uint64_t, ndim=1]
65+
66+
def __init__(
67+
self,
68+
levels: list[np.ndarray], # all entries hashable
69+
labels: list[np.ndarray], # all entries integer-dtyped
70+
offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1]
71+
): ...
72+
73+
def get_indexer(
74+
self,
75+
target: np.ndarray, # np.ndarray[object]
76+
) -> np.ndarray: ... # np.ndarray[np.intp]
77+
78+
def _extract_level_codes(self, target: object): ...
79+
80+
def get_indexer_with_fill(
81+
self,
82+
target: np.ndarray, # np.ndarray[object] of tuples
83+
values: np.ndarray, # np.ndarray[object] of tuples
84+
method: str,
85+
limit: int | None,
86+
) -> np.ndarray: ... # np.ndarray[np.int64]

pandas/_libs/index.pyx

+14-11
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ cdef class IndexEngine:
259259
self.monotonic_inc = 0
260260
self.monotonic_dec = 0
261261

262-
def get_indexer(self, ndarray values):
262+
def get_indexer(self, ndarray values) -> np.ndarray:
263263
self._ensure_mapping_populated()
264264
return self.mapping.lookup(values)
265265

@@ -269,6 +269,11 @@ cdef class IndexEngine:
269269
return the labels in the same order as the target
270270
and a missing indexer into the targets (which correspond
271271
to the -1 indices in the results
272+
273+
Returns
274+
-------
275+
indexer : np.ndarray[np.intp]
276+
missing : np.ndarray[np.intp]
272277
"""
273278
cdef:
274279
ndarray values, x
@@ -455,7 +460,7 @@ cdef class DatetimeEngine(Int64Engine):
455460
# we may get datetime64[ns] or timedelta64[ns], cast these to int64
456461
return super().get_indexer_non_unique(targets.view("i8"))
457462

458-
def get_indexer(self, ndarray values):
463+
def get_indexer(self, ndarray values) -> np.ndarray:
459464
self._ensure_mapping_populated()
460465
if values.dtype != self._get_box_dtype():
461466
return np.repeat(-1, len(values)).astype(np.intp)
@@ -572,17 +577,17 @@ cdef class BaseMultiIndexCodesEngine:
572577
# integers representing labels: we will use its get_loc and get_indexer
573578
self._base.__init__(self, lambda: lab_ints, len(lab_ints))
574579

575-
def _codes_to_ints(self, codes):
580+
def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
576581
raise NotImplementedError("Implemented by subclass")
577582

578-
def _extract_level_codes(self, object target):
583+
def _extract_level_codes(self, ndarray[object] target) -> np.ndarray:
579584
"""
580585
Map the requested list of (tuple) keys to their integer representations
581586
for searching in the underlying integer index.
582587

583588
Parameters
584589
----------
585-
target : list-like of keys
590+
target : ndarray[object]
586591
Each key is a tuple, with a label for each level of the index.
587592

588593
Returns
@@ -607,7 +612,7 @@ cdef class BaseMultiIndexCodesEngine:
607612

608613
Returns
609614
-------
610-
np.ndarray[int64_t, ndim=1] of the indexer of `target` into
615+
np.ndarray[intp_t, ndim=1] of the indexer of `target` into
611616
`self.values`
612617
"""
613618
lab_ints = self._extract_level_codes(target)
@@ -635,15 +640,15 @@ cdef class BaseMultiIndexCodesEngine:
635640
the same as the length of all tuples in `values`
636641
values : ndarray[object] of tuples
637642
must be sorted and all have the same length. Should be the set of
638-
the MultiIndex's values. Needed only if `method` is not None
643+
the MultiIndex's values.
639644
method: string
640645
"backfill" or "pad"
641646
limit: int or None
642647
if provided, limit the number of fills to this value
643648

644649
Returns
645650
-------
646-
np.ndarray[int64_t, ndim=1] of the indexer of `target` into `values`,
651+
np.ndarray[intp_t, ndim=1] of the indexer of `target` into `values`,
647652
filled with the `method` (and optionally `limit`) specified
648653
"""
649654
assert method in ("backfill", "pad")
@@ -714,9 +719,7 @@ cdef class BaseMultiIndexCodesEngine:
714719

715720
return self._base.get_loc(self, lab_int)
716721

717-
def get_indexer_non_unique(self, ndarray target):
718-
# This needs to be overridden just because the default one works on
719-
# target._values, and target can be itself a MultiIndex.
722+
def get_indexer_non_unique(self, ndarray[object] target):
720723

721724
lab_ints = self._extract_level_codes(target)
722725
indexer = self._base.get_indexer_non_unique(self, lab_ints)

pandas/core/arrays/string_.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -405,12 +405,7 @@ def _cmp_method(self, other, op):
405405
_str_na_value = StringDtype.na_value
406406

407407
def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None):
408-
from pandas.arrays import (
409-
BooleanArray,
410-
IntegerArray,
411-
StringArray,
412-
)
413-
from pandas.core.arrays.string_ import StringDtype
408+
from pandas.arrays import BooleanArray
414409

415410
if dtype is None:
416411
dtype = StringDtype()

pandas/core/indexes/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def _outer_indexer(
320320
# would we like our indexing holder to defer to us
321321
_defer_to_indexing = False
322322

323-
_engine_type = libindex.ObjectEngine
323+
_engine_type: Type[libindex.IndexEngine] = libindex.ObjectEngine
324324
# whether we support partial string indexing. Overridden
325325
# in DatetimeIndex and PeriodIndex
326326
_supports_partial_string_indexing = False
@@ -723,8 +723,8 @@ def _cleanup(self) -> None:
723723
self._engine.clear_mapping()
724724

725725
@cache_readonly
726-
def _engine(self) -> libindex.ObjectEngine:
727-
# property, for now, slow to look up
726+
def _engine(self) -> libindex.IndexEngine:
727+
# For base class (object dtype) we get ObjectEngine
728728

729729
# to avoid a reference cycle, bind `target_values` to a local variable, so
730730
# `self` is not passed into the lambda.

pandas/core/indexes/datetimelike.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Any,
88
List,
99
Optional,
10+
Sequence,
1011
Tuple,
1112
TypeVar,
1213
Union,
@@ -536,7 +537,7 @@ def shift(self: _T, periods: int = 1, freq=None) -> _T:
536537
# --------------------------------------------------------------------
537538
# List-like Methods
538539

539-
def _get_delete_freq(self, loc: int):
540+
def _get_delete_freq(self, loc: Union[int, slice, Sequence[int]]):
540541
"""
541542
Find the `freq` for self.delete(loc).
542543
"""

pandas/core/indexes/multi.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ def from_tuples(
557557
arrays = [[]] * len(names)
558558
elif isinstance(tuples, (np.ndarray, Index)):
559559
if isinstance(tuples, Index):
560-
tuples = tuples._values
560+
tuples = np.asarray(tuples._values)
561561

562562
arrays = list(lib.tuples_to_object_array(tuples).T)
563563
elif isinstance(tuples, list):
@@ -2689,11 +2689,16 @@ def _get_indexer(
26892689
target, method=method, limit=limit, tolerance=tolerance
26902690
)
26912691

2692+
# TODO: explicitly raise here? we only have one test that
2693+
# gets here, and it is checking that we raise with method="nearest"
2694+
26922695
if method == "pad" or method == "backfill":
26932696
if tolerance is not None:
26942697
raise NotImplementedError(
26952698
"tolerance not implemented yet for MultiIndex"
26962699
)
2700+
# TODO: get_indexer_with_fill docstring says values must be _sorted_
2701+
# but that doesn't appear to be enforced
26972702
indexer = self._engine.get_indexer_with_fill(
26982703
target=target._values, values=self._values, method=method, limit=limit
26992704
)
@@ -2705,6 +2710,8 @@ def _get_indexer(
27052710
else:
27062711
indexer = self._engine.get_indexer(target._values)
27072712

2713+
# Note: we only get here (in extant tests at least) with
2714+
# target.nlevels == self.nlevels
27082715
return ensure_platform_int(indexer)
27092716

27102717
def get_slice_bound(

0 commit comments

Comments
 (0)