Skip to content

Commit 3570151

Browse files
authored
REF/TYP: indexes (#40330)
1 parent 921477e commit 3570151

File tree

8 files changed

+66
-71
lines changed

8 files changed

+66
-71
lines changed

pandas/core/indexes/base.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,6 @@
173173
RangeIndex,
174174
Series,
175175
)
176-
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
177176

178177

179178
__all__ = ["Index"]
@@ -305,7 +304,7 @@ def _outer_indexer(
305304

306305
_typ = "index"
307306
_data: Union[ExtensionArray, np.ndarray]
308-
_id: Optional[_Identity] = None
307+
_id: Optional[object] = None
309308
_name: Hashable = None
310309
# MultiIndex.levels previously allowed setting the index name. We
311310
# don't allow this anymore, and raise if it happens rather than
@@ -711,7 +710,7 @@ def _reset_identity(self) -> None:
711710
"""
712711
Initializes or resets ``_id`` attribute with new object.
713712
"""
714-
self._id = _Identity(object())
713+
self._id = object()
715714

716715
@final
717716
def _cleanup(self) -> None:
@@ -1717,7 +1716,7 @@ def sortlevel(self, level=None, ascending=True, sort_remaining=None):
17171716

17181717
return self.sort_values(return_indexer=True, ascending=ascending)
17191718

1720-
def _get_level_values(self, level):
1719+
def _get_level_values(self, level) -> Index:
17211720
"""
17221721
Return an Index of values for requested level.
17231722
@@ -2977,11 +2976,8 @@ def _union(self, other: Index, sort):
29772976
return result
29782977

29792978
@final
2980-
def _wrap_setop_result(self, other, result):
2981-
if needs_i8_conversion(self.dtype) and isinstance(result, np.ndarray):
2982-
self = cast("DatetimeIndexOpsMixin", self)
2983-
result = type(self._data)._simple_new(result, dtype=self.dtype)
2984-
elif is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray):
2979+
def _wrap_setop_result(self, other: Index, result) -> Index:
2980+
if is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray):
29852981
result = Categorical(result, dtype=self.dtype)
29862982

29872983
name = get_op_result_name(self, other)

pandas/core/indexes/category.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ class CategoricalIndex(NDArrayBackedExtensionIndex, accessor.PandasDelegate):
178178
"""
179179

180180
_typ = "categoricalindex"
181+
_data_cls = Categorical
181182

182183
@property
183184
def _can_hold_strings(self):
@@ -225,18 +226,6 @@ def __new__(
225226

226227
return cls._simple_new(data, name=name)
227228

228-
@classmethod
229-
def _simple_new(cls, values: Categorical, name: Optional[Hashable] = None):
230-
assert isinstance(values, Categorical), type(values)
231-
result = object.__new__(cls)
232-
233-
result._data = values
234-
result._name = name
235-
result._cache = {}
236-
237-
result._reset_identity()
238-
return result
239-
240229
# --------------------------------------------------------------------
241230

242231
@doc(Index._shallow_copy)

pandas/core/indexes/datetimelike.py

+2-27
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,9 @@
55
from typing import (
66
TYPE_CHECKING,
77
Any,
8-
Hashable,
98
List,
109
Optional,
1110
Tuple,
12-
Type,
1311
TypeVar,
1412
Union,
1513
cast,
@@ -44,7 +42,6 @@
4442
is_integer,
4543
is_list_like,
4644
is_period_dtype,
47-
is_scalar,
4845
)
4946
from pandas.core.dtypes.concat import concat_compat
5047

@@ -119,7 +116,6 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
119116

120117
_can_hold_strings = False
121118
_data: Union[DatetimeArray, TimedeltaArray, PeriodArray]
122-
_data_cls: Union[Type[DatetimeArray], Type[TimedeltaArray], Type[PeriodArray]]
123119
freq: Optional[BaseOffset]
124120
freqstr: Optional[str]
125121
_resolution_obj: Resolution
@@ -132,25 +128,6 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
132128
)
133129
_hasnans = hasnans # for index / array -agnostic code
134130

135-
@classmethod
136-
def _simple_new(
137-
cls,
138-
values: Union[DatetimeArray, TimedeltaArray, PeriodArray],
139-
name: Optional[Hashable] = None,
140-
):
141-
assert isinstance(values, cls._data_cls), type(values)
142-
143-
result = object.__new__(cls)
144-
result._data = values
145-
result._name = name
146-
result._cache = {}
147-
148-
# For groupby perf. See note in indexes/base about _index_data
149-
result._index_data = values._ndarray
150-
151-
result._reset_identity()
152-
return result
153-
154131
@property
155132
def _is_all_dates(self) -> bool:
156133
return True
@@ -219,12 +196,10 @@ def equals(self, other: Any) -> bool:
219196
def __contains__(self, key: Any) -> bool:
220197
hash(key)
221198
try:
222-
res = self.get_loc(key)
199+
self.get_loc(key)
223200
except (KeyError, TypeError, ValueError):
224201
return False
225-
return bool(
226-
is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))
227-
)
202+
return True
228203

229204
@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
230205
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):

pandas/core/indexes/extension.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
Shared methods for Index subclasses backed by ExtensionArray.
33
"""
44
from typing import (
5+
Hashable,
56
List,
7+
Type,
68
TypeVar,
79
Union,
810
)
@@ -30,7 +32,13 @@
3032
ABCSeries,
3133
)
3234

33-
from pandas.core.arrays import IntervalArray
35+
from pandas.core.arrays import (
36+
Categorical,
37+
DatetimeArray,
38+
IntervalArray,
39+
PeriodArray,
40+
TimedeltaArray,
41+
)
3442
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
3543
from pandas.core.indexers import deprecate_ndim_indexing
3644
from pandas.core.indexes.base import Index
@@ -352,6 +360,32 @@ class NDArrayBackedExtensionIndex(ExtensionIndex):
352360

353361
_data: NDArrayBackedExtensionArray
354362

363+
_data_cls: Union[
364+
Type[Categorical],
365+
Type[DatetimeArray],
366+
Type[TimedeltaArray],
367+
Type[PeriodArray],
368+
]
369+
370+
@classmethod
371+
def _simple_new(
372+
cls,
373+
values: NDArrayBackedExtensionArray,
374+
name: Hashable = None,
375+
):
376+
assert isinstance(values, cls._data_cls), type(values)
377+
378+
result = object.__new__(cls)
379+
result._data = values
380+
result._name = name
381+
result._cache = {}
382+
383+
# For groupby perf. See note in indexes/base about _index_data
384+
result._index_data = values._ndarray
385+
386+
result._reset_identity()
387+
return result
388+
355389
def _get_engine_target(self) -> np.ndarray:
356390
return self._data._ndarray
357391

pandas/core/indexes/multi.py

+21-17
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
TYPE_CHECKING,
77
Any,
88
Callable,
9+
Collection,
910
Hashable,
1011
Iterable,
1112
List,
@@ -98,6 +99,7 @@
9899
if TYPE_CHECKING:
99100
from pandas import (
100101
CategoricalIndex,
102+
DataFrame,
101103
Series,
102104
)
103105

@@ -323,7 +325,7 @@ def __new__(
323325
if len(levels) == 0:
324326
raise ValueError("Must pass non-zero number of levels/codes")
325327

326-
result = object.__new__(MultiIndex)
328+
result = object.__new__(cls)
327329
result._cache = {}
328330

329331
# we've already validated levels and codes, so shortcut here
@@ -503,7 +505,7 @@ def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex
503505
@names_compat
504506
def from_tuples(
505507
cls,
506-
tuples,
508+
tuples: Iterable[Tuple[Hashable, ...]],
507509
sortorder: Optional[int] = None,
508510
names: Optional[Sequence[Hashable]] = None,
509511
) -> MultiIndex:
@@ -546,6 +548,7 @@ def from_tuples(
546548
raise TypeError("Input must be a list / sequence of tuple-likes.")
547549
elif is_iterator(tuples):
548550
tuples = list(tuples)
551+
tuples = cast(Collection[Tuple[Hashable, ...]], tuples)
549552

550553
arrays: List[Sequence[Hashable]]
551554
if len(tuples) == 0:
@@ -560,7 +563,8 @@ def from_tuples(
560563
elif isinstance(tuples, list):
561564
arrays = list(lib.to_object_array_tuples(tuples).T)
562565
else:
563-
arrays = zip(*tuples)
566+
arrs = zip(*tuples)
567+
arrays = cast(List[Sequence[Hashable]], arrs)
564568

565569
return cls.from_arrays(arrays, sortorder=sortorder, names=names)
566570

@@ -626,7 +630,7 @@ def from_product(
626630
return cls(levels, codes, sortorder=sortorder, names=names)
627631

628632
@classmethod
629-
def from_frame(cls, df, sortorder=None, names=None) -> MultiIndex:
633+
def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex:
630634
"""
631635
Make a MultiIndex from a DataFrame.
632636
@@ -762,7 +766,7 @@ def __len__(self) -> int:
762766
# Levels Methods
763767

764768
@cache_readonly
765-
def levels(self):
769+
def levels(self) -> FrozenList:
766770
# Use cache_readonly to ensure that self.get_locs doesn't repeatedly
767771
# create new IndexEngine
768772
# https://github.com/pandas-dev/pandas/issues/31648
@@ -1293,7 +1297,7 @@ def _formatter_func(self, tup):
12931297
formatter_funcs = [level._formatter_func for level in self.levels]
12941298
return tuple(func(val) for func, val in zip(formatter_funcs, tup))
12951299

1296-
def _format_data(self, name=None):
1300+
def _format_data(self, name=None) -> str:
12971301
"""
12981302
Return the formatted data as a unicode string
12991303
"""
@@ -1419,10 +1423,10 @@ def format(
14191423
# --------------------------------------------------------------------
14201424
# Names Methods
14211425

1422-
def _get_names(self):
1426+
def _get_names(self) -> FrozenList:
14231427
return FrozenList(self._names)
14241428

1425-
def _set_names(self, names, level=None, validate=True):
1429+
def _set_names(self, names, level=None, validate: bool = True):
14261430
"""
14271431
Set new names on index. Each name has to be a hashable type.
14281432
@@ -1433,7 +1437,7 @@ def _set_names(self, names, level=None, validate=True):
14331437
level : int, level name, or sequence of int/level names (default None)
14341438
If the index is a MultiIndex (hierarchical), level(s) to set (None
14351439
for all levels). Otherwise level must be None
1436-
validate : boolean, default True
1440+
validate : bool, default True
14371441
validate that the names match level lengths
14381442
14391443
Raises
@@ -1712,7 +1716,7 @@ def unique(self, level=None):
17121716
level = self._get_level_number(level)
17131717
return self._get_level_values(level=level, unique=True)
17141718

1715-
def to_frame(self, index=True, name=None):
1719+
def to_frame(self, index=True, name=None) -> DataFrame:
17161720
"""
17171721
Create a DataFrame with the levels of the MultiIndex as columns.
17181722
@@ -2109,8 +2113,8 @@ def take(
21092113

21102114
na_value = -1
21112115

2116+
taken = [lab.take(indices) for lab in self.codes]
21122117
if allow_fill:
2113-
taken = [lab.take(indices) for lab in self.codes]
21142118
mask = indices == -1
21152119
if mask.any():
21162120
masked = []
@@ -2119,8 +2123,6 @@ def take(
21192123
label_values[mask] = na_value
21202124
masked.append(np.asarray(label_values))
21212125
taken = masked
2122-
else:
2123-
taken = [lab.take(indices) for lab in self.codes]
21242126

21252127
return MultiIndex(
21262128
levels=self.levels, codes=taken, names=self.names, verify_integrity=False
@@ -2644,7 +2646,9 @@ def _get_partial_string_timestamp_match_key(self, key):
26442646

26452647
return key
26462648

2647-
def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None):
2649+
def _get_indexer(
2650+
self, target: Index, method=None, limit=None, tolerance=None
2651+
) -> np.ndarray:
26482652

26492653
# empty indexer
26502654
if not len(target):
@@ -3521,7 +3525,7 @@ def equals(self, other: object) -> bool:
35213525

35223526
return True
35233527

3524-
def equal_levels(self, other) -> bool:
3528+
def equal_levels(self, other: MultiIndex) -> bool:
35253529
"""
35263530
Return True if the levels of both MultiIndex objects are the same
35273531
@@ -3537,7 +3541,7 @@ def equal_levels(self, other) -> bool:
35373541
# --------------------------------------------------------------------
35383542
# Set Methods
35393543

3540-
def _union(self, other, sort):
3544+
def _union(self, other, sort) -> MultiIndex:
35413545
other, result_names = self._convert_can_do_setop(other)
35423546

35433547
# We could get here with CategoricalIndex other
@@ -3579,7 +3583,7 @@ def _maybe_match_names(self, other):
35793583
names.append(None)
35803584
return names
35813585

3582-
def _intersection(self, other, sort=False):
3586+
def _intersection(self, other, sort=False) -> MultiIndex:
35833587
other, result_names = self._convert_can_do_setop(other)
35843588

35853589
lvals = self._values

pandas/core/indexes/numeric.py

+1
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ def __contains__(self, key) -> bool:
232232
hash(key)
233233
try:
234234
if is_float(key) and int(key) != key:
235+
# otherwise the `key in self._engine` check casts e.g. 1.1 -> 1
235236
return False
236237
return key in self._engine
237238
except (OverflowError, TypeError, ValueError):

pandas/core/indexes/range.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class RangeIndex(Int64Index):
6767
6868
Parameters
6969
----------
70-
start : int (default: 0), or other RangeIndex instance
70+
start : int (default: 0), range, or other RangeIndex instance
7171
If int and "stop" is not given, interpreted as "stop" instead.
7272
stop : int (default: 0)
7373
step : int (default: 1)

pandas/tests/groupby/test_function.py

-4
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,6 @@ def test_intercept_builtin_sum():
122122
tm.assert_series_equal(result2, expected)
123123

124124

125-
# @pytest.mark.parametrize("f", [max, min, sum])
126-
# def test_builtins_apply(f):
127-
128-
129125
@pytest.mark.parametrize("f", [max, min, sum])
130126
@pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key
131127
def test_builtins_apply(keys, f):

0 commit comments

Comments
 (0)