Skip to content

Commit a15aff6

Browse files
authored
CLN: annotations, docstrings (#41089)
1 parent be921ff commit a15aff6

File tree

8 files changed

+92
-50
lines changed

8 files changed

+92
-50
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

+4-1
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,10 @@ cdef class {{name}}HashTable(HashTable):
687687

688688
{{if dtype == 'int64'}}
689689
@cython.boundscheck(False)
690-
def get_labels_groupby(self, const {{dtype}}_t[:] values):
690+
def get_labels_groupby(
691+
self, const {{dtype}}_t[:] values
692+
) -> tuple[ndarray, ndarray]:
693+
# tuple[np.ndarray[np.intp], np.ndarray[{{dtype}}]]
691694
cdef:
692695
Py_ssize_t i, n = len(values)
693696
intp_t[:] labels

pandas/core/arrays/period.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ class PeriodArray(dtl.DatelikeOps):
178178
"days_in_month",
179179
"daysinmonth",
180180
]
181-
_datetimelike_ops = _field_ops + _object_ops + _bool_ops
182-
_datetimelike_methods = ["strftime", "to_timestamp", "asfreq"]
181+
_datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops
182+
_datetimelike_methods: list[str] = ["strftime", "to_timestamp", "asfreq"]
183183

184184
# --------------------------------------------------------------------
185185
# Constructors

pandas/core/dtypes/dtypes.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,14 @@
4848
)
4949

5050
if TYPE_CHECKING:
51+
from datetime import tzinfo
52+
5153
import pyarrow
5254

53-
from pandas import Categorical
55+
from pandas import (
56+
Categorical,
57+
Index,
58+
)
5459
from pandas.core.arrays import (
5560
DatetimeArray,
5661
IntervalArray,
@@ -445,8 +450,8 @@ def _hash_categories(self) -> int:
445450
# assumes if any individual category is a tuple, then all our. ATM
446451
# I don't really want to support just some of the categories being
447452
# tuples.
448-
categories = list(categories) # breaks if a np.array of categories
449-
cat_array = hash_tuples(categories)
453+
cat_list = list(categories) # breaks if a np.array of categories
454+
cat_array = hash_tuples(cat_list)
450455
else:
451456
if categories.dtype == "O" and len({type(x) for x in categories}) != 1:
452457
# TODO: hash_array doesn't handle mixed types. It casts
@@ -509,7 +514,7 @@ def validate_ordered(ordered: Ordered) -> None:
509514
raise TypeError("'ordered' must either be 'True' or 'False'")
510515

511516
@staticmethod
512-
def validate_categories(categories, fastpath: bool = False):
517+
def validate_categories(categories, fastpath: bool = False) -> Index:
513518
"""
514519
Validates that we have good categories
515520
@@ -579,7 +584,7 @@ def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype:
579584
return CategoricalDtype(new_categories, new_ordered)
580585

581586
@property
582-
def categories(self):
587+
def categories(self) -> Index:
583588
"""
584589
An ``Index`` containing the unique categories allowed.
585590
"""
@@ -717,7 +722,7 @@ def unit(self) -> str_type:
717722
return self._unit
718723

719724
@property
720-
def tz(self):
725+
def tz(self) -> tzinfo:
721726
"""
722727
The timezone.
723728
"""
@@ -882,7 +887,7 @@ def freq(self):
882887
return self._freq
883888

884889
@classmethod
885-
def _parse_dtype_strict(cls, freq):
890+
def _parse_dtype_strict(cls, freq: str_type) -> BaseOffset:
886891
if isinstance(freq, str):
887892
if freq.startswith("period[") or freq.startswith("Period["):
888893
m = cls._match.search(freq)
@@ -1138,7 +1143,7 @@ def construct_array_type(cls) -> type[IntervalArray]:
11381143
return IntervalArray
11391144

11401145
@classmethod
1141-
def construct_from_string(cls, string):
1146+
def construct_from_string(cls, string: str_type) -> IntervalDtype:
11421147
"""
11431148
attempt to construct this type from a string, raise a TypeError
11441149
if its not possible

pandas/core/groupby/ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ class BaseGrouper:
281281
whether this grouper will give sorted result or not
282282
group_keys : bool, default True
283283
mutated : bool, default False
284-
indexer : intp array, optional
284+
indexer : np.ndarray[np.intp], optional
285285
the indexer created by Grouper
286286
some groupers (TimeGrouper) will sort its axis and its
287287
group_info is also sorted, so need the indexer to reorder

pandas/core/indexes/base.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -3029,9 +3029,6 @@ def _union(self, other: Index, sort):
30293029

30303030
@final
30313031
def _wrap_setop_result(self, other: Index, result) -> Index:
3032-
if is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray):
3033-
result = Categorical(result, dtype=self.dtype)
3034-
30353032
name = get_op_result_name(self, other)
30363033
if isinstance(result, Index):
30373034
if result.name != name:
@@ -4028,7 +4025,7 @@ def join(
40284025
return join_index, lindexer, rindexer
40294026

40304027
@final
4031-
def _join_multi(self, other, how):
4028+
def _join_multi(self, other: Index, how: str_t):
40324029
from pandas.core.indexes.multi import MultiIndex
40334030
from pandas.core.reshape.merge import restore_dropped_levels_multijoin
40344031

@@ -4273,7 +4270,7 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> np.ndarray:
42734270
return join_index, left_indexer, right_indexer
42744271

42754272
@final
4276-
def _join_monotonic(self, other: Index, how="left"):
4273+
def _join_monotonic(self, other: Index, how: str_t = "left"):
42774274
# We only get here with matching dtypes
42784275
assert other.dtype == self.dtype
42794276

@@ -5527,7 +5524,7 @@ def isin(self, values, level=None) -> np.ndarray:
55275524
55285525
Returns
55295526
-------
5530-
is_contained : ndarray[bool]
5527+
np.ndarray[bool]
55315528
NumPy array of boolean values.
55325529
55335530
See Also

pandas/core/indexing.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
from pandas.core.dtypes.concat import concat_compat
3737
from pandas.core.dtypes.generic import (
3838
ABCDataFrame,
39-
ABCMultiIndex,
4039
ABCSeries,
4140
)
4241
from pandas.core.dtypes.missing import (
@@ -53,7 +52,10 @@
5352
is_list_like_indexer,
5453
length_of_indexer,
5554
)
56-
from pandas.core.indexes.api import Index
55+
from pandas.core.indexes.api import (
56+
Index,
57+
MultiIndex,
58+
)
5759

5860
if TYPE_CHECKING:
5961
from pandas import (
@@ -642,7 +644,7 @@ def _get_setitem_indexer(self, key):
642644

643645
ax = self.obj._get_axis(0)
644646

645-
if isinstance(ax, ABCMultiIndex) and self.name != "iloc":
647+
if isinstance(ax, MultiIndex) and self.name != "iloc":
646648
with suppress(TypeError, KeyError, InvalidIndexError):
647649
# TypeError e.g. passed a bool
648650
return ax.get_loc(key)
@@ -690,7 +692,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None):
690692

691693
if (
692694
axis == column_axis
693-
and not isinstance(self.obj.columns, ABCMultiIndex)
695+
and not isinstance(self.obj.columns, MultiIndex)
694696
and is_list_like_indexer(key)
695697
and not com.is_bool_indexer(key)
696698
and all(is_hashable(k) for k in key)
@@ -756,7 +758,7 @@ def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
756758
-------
757759
bool
758760
"""
759-
if any(isinstance(ax, ABCMultiIndex) for ax in self.obj.axes):
761+
if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
760762
return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
761763
return False
762764

@@ -817,7 +819,7 @@ def _getitem_lowerdim(self, tup: tuple):
817819
ax0 = self.obj._get_axis(0)
818820
# ...but iloc should handle the tuple as simple integer-location
819821
# instead of checking it as multiindex representation (GH 13797)
820-
if isinstance(ax0, ABCMultiIndex) and self.name != "iloc":
822+
if isinstance(ax0, MultiIndex) and self.name != "iloc":
821823
with suppress(IndexingError):
822824
return self._handle_lowerdim_multi_index_axis0(tup)
823825

@@ -996,7 +998,7 @@ def _is_scalar_access(self, key: tuple) -> bool:
996998
return False
997999

9981000
ax = self.obj.axes[i]
999-
if isinstance(ax, ABCMultiIndex):
1001+
if isinstance(ax, MultiIndex):
10001002
return False
10011003

10021004
if isinstance(k, str) and ax._supports_partial_string_indexing:
@@ -1142,7 +1144,7 @@ def _getitem_axis(self, key, axis: int):
11421144
elif is_list_like_indexer(key):
11431145

11441146
# an iterable multi-selection
1145-
if not (isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)):
1147+
if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):
11461148

11471149
if hasattr(key, "ndim") and key.ndim > 1:
11481150
raise ValueError("Cannot index with multidimensional key")
@@ -1205,20 +1207,20 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
12051207
is_int_index = labels.is_integer()
12061208
is_int_positional = is_integer(key) and not is_int_index
12071209

1208-
if is_scalar(key) or isinstance(labels, ABCMultiIndex):
1210+
if is_scalar(key) or isinstance(labels, MultiIndex):
12091211
# Otherwise get_loc will raise InvalidIndexError
12101212

12111213
# if we are a label return me
12121214
try:
12131215
return labels.get_loc(key)
12141216
except LookupError:
1215-
if isinstance(key, tuple) and isinstance(labels, ABCMultiIndex):
1217+
if isinstance(key, tuple) and isinstance(labels, MultiIndex):
12161218
if len(key) == labels.nlevels:
12171219
return {"key": key}
12181220
raise
12191221
except InvalidIndexError:
12201222
# GH35015, using datetime as column indices raises exception
1221-
if not isinstance(labels, ABCMultiIndex):
1223+
if not isinstance(labels, MultiIndex):
12221224
raise
12231225
except TypeError:
12241226
pass
@@ -1620,7 +1622,7 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
16201622
# GH 10360, GH 27841
16211623
if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
16221624
for i, ax in zip(indexer, self.obj.axes):
1623-
if isinstance(ax, ABCMultiIndex) and not (
1625+
if isinstance(ax, MultiIndex) and not (
16241626
is_integer(i) or com.is_null_slice(i)
16251627
):
16261628
take_split_path = True
@@ -1819,7 +1821,7 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
18191821
sub_indexer = list(indexer)
18201822
pi = indexer[0]
18211823

1822-
multiindex_indexer = isinstance(self.obj.columns, ABCMultiIndex)
1824+
multiindex_indexer = isinstance(self.obj.columns, MultiIndex)
18231825

18241826
unique_cols = value.columns.is_unique
18251827

@@ -2163,8 +2165,8 @@ def _align_frame(self, indexer, df: DataFrame):
21632165
# we have a multi-index and are trying to align
21642166
# with a particular, level GH3738
21652167
if (
2166-
isinstance(ax, ABCMultiIndex)
2167-
and isinstance(df.index, ABCMultiIndex)
2168+
isinstance(ax, MultiIndex)
2169+
and isinstance(df.index, MultiIndex)
21682170
and ax.nlevels != df.index.nlevels
21692171
):
21702172
raise TypeError(
@@ -2428,7 +2430,7 @@ def is_nested_tuple(tup, labels) -> bool:
24282430

24292431
for k in tup:
24302432
if is_list_like(k) or isinstance(k, slice):
2431-
return isinstance(labels, ABCMultiIndex)
2433+
return isinstance(labels, MultiIndex)
24322434

24332435
return False
24342436

pandas/core/resample.py

+25-11
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
to_offset,
2121
)
2222
from pandas._typing import (
23+
FrameOrSeries,
2324
T,
2425
TimedeltaConvertibleTypes,
2526
TimestampConvertibleTypes,
@@ -1348,9 +1349,15 @@ def _upsample(self, method, limit=None, fill_value=None):
13481349

13491350
# Get the fill indexer
13501351
indexer = memb.get_indexer(new_index, method=method, limit=limit)
1351-
return self._wrap_result(
1352-
_take_new_index(obj, indexer, new_index, axis=self.axis)
1352+
new_obj = _take_new_index(
1353+
obj,
1354+
indexer,
1355+
# error: Argument 3 to "_take_new_index" has incompatible type
1356+
# "Optional[Any]"; expected "Index"
1357+
new_index, # type: ignore[arg-type]
1358+
axis=self.axis,
13531359
)
1360+
return self._wrap_result(new_obj)
13541361

13551362

13561363
class PeriodIndexResamplerGroupby(_GroupByMixin, PeriodIndexResampler):
@@ -1669,7 +1676,7 @@ def _adjust_bin_edges(self, binner, ax_values):
16691676
bin_edges = binner.asi8
16701677
return binner, bin_edges
16711678

1672-
def _get_time_delta_bins(self, ax):
1679+
def _get_time_delta_bins(self, ax: TimedeltaIndex):
16731680
if not isinstance(ax, TimedeltaIndex):
16741681
raise TypeError(
16751682
"axis must be a TimedeltaIndex, but got "
@@ -1792,17 +1799,24 @@ def _get_period_bins(self, ax: PeriodIndex):
17921799
return binner, bins, labels
17931800

17941801

1795-
def _take_new_index(obj, indexer, new_index, axis=0):
1802+
def _take_new_index(
1803+
obj: FrameOrSeries, indexer: np.ndarray, new_index: Index, axis: int = 0
1804+
) -> FrameOrSeries:
1805+
# indexer: np.ndarray[np.intp]
17961806

17971807
if isinstance(obj, ABCSeries):
17981808
new_values = algos.take_nd(obj._values, indexer)
1799-
return obj._constructor(new_values, index=new_index, name=obj.name)
1809+
# error: Incompatible return value type (got "Series", expected "FrameOrSeries")
1810+
return obj._constructor( # type: ignore[return-value]
1811+
new_values, index=new_index, name=obj.name
1812+
)
18001813
elif isinstance(obj, ABCDataFrame):
18011814
if axis == 1:
18021815
raise NotImplementedError("axis 1 is not supported")
1803-
return obj._constructor(
1804-
obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1)
1805-
)
1816+
new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1)
1817+
# error: Incompatible return value type
1818+
# (got "DataFrame", expected "FrameOrSeries")
1819+
return obj._constructor(new_mgr) # type: ignore[return-value]
18061820
else:
18071821
raise ValueError("'obj' should be either a Series or a DataFrame")
18081822

@@ -1825,7 +1839,7 @@ def _get_timestamp_range_edges(
18251839
The ending Timestamp of the range to be adjusted.
18261840
freq : pd.DateOffset
18271841
The dateoffset to which the Timestamps will be adjusted.
1828-
closed : {'right', 'left'}, default None
1842+
closed : {'right', 'left'}, default "left"
18291843
Which side of bin interval is closed.
18301844
origin : {'epoch', 'start', 'start_day'} or Timestamp, default 'start_day'
18311845
The timestamp on which to adjust the grouping. The timezone of origin must
@@ -1895,7 +1909,7 @@ def _get_period_range_edges(
18951909
The ending Period of the range to be adjusted.
18961910
freq : pd.DateOffset
18971911
The freq to which the Periods will be adjusted.
1898-
closed : {'right', 'left'}, default None
1912+
closed : {'right', 'left'}, default "left"
18991913
Which side of bin interval is closed.
19001914
origin : {'epoch', 'start', 'start_day'}, Timestamp, default 'start_day'
19011915
The timestamp on which to adjust the grouping. The timezone of origin must
@@ -2045,7 +2059,7 @@ def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None):
20452059
return new_obj
20462060

20472061

2048-
def _asfreq_compat(index, freq):
2062+
def _asfreq_compat(index: DatetimeIndex | PeriodIndex | TimedeltaIndex, freq):
20492063
"""
20502064
Helper to mimic asfreq on (empty) DatetimeIndex and TimedeltaIndex.
20512065

0 commit comments

Comments
 (0)