Skip to content

Commit 7a9307b

Browse files
authored
TYP: mostly datetimelike (#36696)
1 parent 1d29bf0 commit 7a9307b

File tree

12 files changed

+60
-41
lines changed

12 files changed

+60
-41
lines changed

pandas/core/arrays/categorical.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
288288
# tolist is not actually deprecated, just suppressed in the __dir__
289289
_deprecations = PandasObject._deprecations | frozenset(["tolist"])
290290
_typ = "categorical"
291+
_can_hold_na = True
291292

292293
def __init__(
293294
self, values, categories=None, ordered=None, dtype=None, fastpath=False
@@ -1268,10 +1269,10 @@ def __setstate__(self, state):
12681269
setattr(self, k, v)
12691270

12701271
@property
1271-
def nbytes(self):
1272+
def nbytes(self) -> int:
12721273
return self._codes.nbytes + self.dtype.categories.values.nbytes
12731274

1274-
def memory_usage(self, deep=False):
1275+
def memory_usage(self, deep: bool = False) -> int:
12751276
"""
12761277
Memory usage of my values
12771278
@@ -2144,10 +2145,6 @@ def equals(self, other: object) -> bool:
21442145
return np.array_equal(self._codes, other_codes)
21452146
return False
21462147

2147-
@property
2148-
def _can_hold_na(self):
2149-
return True
2150-
21512148
@classmethod
21522149
def _concat_same_type(self, to_concat):
21532150
from pandas.core.dtypes.concat import union_categoricals

pandas/core/arrays/datetimelike.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,9 @@ class AttributesMixin:
117117
_data: np.ndarray
118118

119119
@classmethod
120-
def _simple_new(cls, values: np.ndarray, **kwargs):
120+
def _simple_new(
121+
cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None
122+
):
121123
raise AbstractMethodError(cls)
122124

123125
@property

pandas/core/arrays/datetimes.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from pandas._libs import lib, tslib
88
from pandas._libs.tslibs import (
9+
BaseOffset,
910
NaT,
1011
NaTType,
1112
Resolution,
@@ -283,7 +284,9 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False):
283284
type(self)._validate_frequency(self, freq)
284285

285286
@classmethod
286-
def _simple_new(cls, values, freq=None, dtype=DT64NS_DTYPE):
287+
def _simple_new(
288+
cls, values, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE
289+
) -> "DatetimeArray":
287290
assert isinstance(values, np.ndarray)
288291
if values.dtype != DT64NS_DTYPE:
289292
assert values.dtype == "i8"

pandas/core/arrays/period.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,13 @@ def __init__(self, values, freq=None, dtype=None, copy=False):
174174
self._dtype = PeriodDtype(freq)
175175

176176
@classmethod
177-
def _simple_new(cls, values: np.ndarray, freq=None, **kwargs) -> "PeriodArray":
177+
def _simple_new(
178+
cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None
179+
) -> "PeriodArray":
178180
# alias for PeriodArray.__init__
179181
assertion_msg = "Should be numpy array of type i8"
180182
assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg
181-
return cls(values, freq=freq, **kwargs)
183+
return cls(values, freq=freq, dtype=dtype)
182184

183185
@classmethod
184186
def _from_sequence(

pandas/core/arrays/string_.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ def value_counts(self, dropna=False):
308308

309309
return value_counts(self._ndarray, dropna=dropna).astype("Int64")
310310

311-
def memory_usage(self, deep=False):
311+
def memory_usage(self, deep: bool = False) -> int:
312312
result = self._ndarray.nbytes
313313
if deep:
314314
return result + lib.memory_usage_of_objects(self._ndarray)

pandas/core/arrays/timedeltas.py

+24-14
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from datetime import timedelta
2-
from typing import List, Union
2+
from typing import List, Optional, Union
33

44
import numpy as np
55

66
from pandas._libs import lib, tslibs
77
from pandas._libs.tslibs import (
8+
BaseOffset,
89
NaT,
910
NaTType,
1011
Period,
@@ -45,8 +46,8 @@
4546
from pandas.core.ops.common import unpack_zerodim_and_defer
4647

4748

48-
def _field_accessor(name, alias, docstring=None):
49-
def f(self):
49+
def _field_accessor(name: str, alias: str, docstring: str):
50+
def f(self) -> np.ndarray:
5051
values = self.asi8
5152
result = get_timedelta_field(values, alias)
5253
if self._hasnans:
@@ -121,7 +122,7 @@ def _box_func(self, x) -> Union[Timedelta, NaTType]:
121122
return Timedelta(x, unit="ns")
122123

123124
@property
124-
def dtype(self):
125+
def dtype(self) -> np.dtype:
125126
"""
126127
The dtype for the TimedeltaArray.
127128
@@ -196,7 +197,9 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False):
196197
type(self)._validate_frequency(self, freq)
197198

198199
@classmethod
199-
def _simple_new(cls, values, freq=None, dtype=TD64NS_DTYPE):
200+
def _simple_new(
201+
cls, values, freq: Optional[BaseOffset] = None, dtype=TD64NS_DTYPE
202+
) -> "TimedeltaArray":
200203
assert dtype == TD64NS_DTYPE, dtype
201204
assert isinstance(values, np.ndarray), type(values)
202205
if values.dtype != TD64NS_DTYPE:
@@ -211,8 +214,13 @@ def _simple_new(cls, values, freq=None, dtype=TD64NS_DTYPE):
211214

212215
@classmethod
213216
def _from_sequence(
214-
cls, data, dtype=TD64NS_DTYPE, copy=False, freq=lib.no_default, unit=None
215-
):
217+
cls,
218+
data,
219+
dtype=TD64NS_DTYPE,
220+
copy: bool = False,
221+
freq=lib.no_default,
222+
unit=None,
223+
) -> "TimedeltaArray":
216224
if dtype:
217225
_validate_td64_dtype(dtype)
218226

@@ -240,7 +248,9 @@ def _from_sequence(
240248
return result
241249

242250
@classmethod
243-
def _generate_range(cls, start, end, periods, freq, closed=None):
251+
def _generate_range(
252+
cls, start, end, periods, freq, closed=None
253+
) -> "TimedeltaArray":
244254

245255
periods = dtl.validate_periods(periods)
246256
if freq is None and any(x is None for x in [periods, start, end]):
@@ -298,7 +308,7 @@ def _maybe_clear_freq(self):
298308
# ----------------------------------------------------------------
299309
# Array-Like / EA-Interface Methods
300310

301-
def astype(self, dtype, copy=True):
311+
def astype(self, dtype, copy: bool = True):
302312
# We handle
303313
# --> timedelta64[ns]
304314
# --> timedelta64
@@ -461,7 +471,7 @@ def _addsub_object_array(self, other, op):
461471
) from err
462472

463473
@unpack_zerodim_and_defer("__mul__")
464-
def __mul__(self, other):
474+
def __mul__(self, other) -> "TimedeltaArray":
465475
if is_scalar(other):
466476
# numpy will accept float and int, raise TypeError for others
467477
result = self._data * other
@@ -737,22 +747,22 @@ def __rdivmod__(self, other):
737747
res2 = other - res1 * self
738748
return res1, res2
739749

740-
def __neg__(self):
750+
def __neg__(self) -> "TimedeltaArray":
741751
if self.freq is not None:
742752
return type(self)(-self._data, freq=-self.freq)
743753
return type(self)(-self._data)
744754

745-
def __pos__(self):
755+
def __pos__(self) -> "TimedeltaArray":
746756
return type(self)(self._data, freq=self.freq)
747757

748-
def __abs__(self):
758+
def __abs__(self) -> "TimedeltaArray":
749759
# Note: freq is not preserved
750760
return type(self)(np.abs(self._data))
751761

752762
# ----------------------------------------------------------------
753763
# Conversion Methods - Vectorized analogues of Timedelta methods
754764

755-
def total_seconds(self):
765+
def total_seconds(self) -> np.ndarray:
756766
"""
757767
Return total duration of each element expressed in seconds.
758768

pandas/core/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1347,7 +1347,7 @@ def memory_usage(self, deep=False):
13471347
13481348
Parameters
13491349
----------
1350-
deep : bool
1350+
deep : bool, default False
13511351
Introspect the data deeply, interrogate
13521352
`object` dtypes for system-level memory consumption.
13531353

pandas/core/indexes/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def _is_all_dates(self) -> bool:
105105
# Abstract data attributes
106106

107107
@property
108-
def values(self):
108+
def values(self) -> np.ndarray:
109109
# Note: PeriodArray overrides this to return an ndarray of objects.
110110
return self._data._data
111111

pandas/core/indexes/period.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def _new_PeriodIndex(cls, **d):
6060

6161

6262
@inherit_names(
63-
["strftime", "to_timestamp", "start_time", "end_time"] + PeriodArray._field_ops,
63+
["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
6464
PeriodArray,
6565
wrap=True,
6666
)
@@ -149,12 +149,18 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index):
149149

150150
# --------------------------------------------------------------------
151151
# methods that dispatch to array and wrap result in PeriodIndex
152+
# These are defined here instead of via inherit_names for mypy
152153

153154
@doc(PeriodArray.asfreq)
154155
def asfreq(self, freq=None, how: str = "E") -> "PeriodIndex":
155156
arr = self._data.asfreq(freq, how)
156157
return type(self)._simple_new(arr, name=self.name)
157158

159+
@doc(PeriodArray.to_timestamp)
160+
def to_timestamp(self, freq=None, how="start") -> DatetimeIndex:
161+
arr = self._data.to_timestamp(freq, how)
162+
return DatetimeIndex._simple_new(arr, name=self.name)
163+
158164
# ------------------------------------------------------------------------
159165
# Index Constructors
160166

@@ -244,11 +250,11 @@ def _simple_new(cls, values: PeriodArray, name: Label = None):
244250
# Data
245251

246252
@property
247-
def values(self):
253+
def values(self) -> np.ndarray:
248254
return np.asarray(self)
249255

250256
@property
251-
def _has_complex_internals(self):
257+
def _has_complex_internals(self) -> bool:
252258
# used to avoid libreduction code paths, which raise or require conversion
253259
return True
254260

@@ -402,7 +408,7 @@ def asof_locs(self, where, mask: np.ndarray) -> np.ndarray:
402408
return result
403409

404410
@doc(Index.astype)
405-
def astype(self, dtype, copy=True, how="start"):
411+
def astype(self, dtype, copy: bool = True, how="start"):
406412
dtype = pandas_dtype(dtype)
407413

408414
if is_datetime64_any_dtype(dtype):
@@ -421,7 +427,7 @@ def is_full(self) -> bool:
421427
"""
422428
if len(self) == 0:
423429
return True
424-
if not self.is_monotonic:
430+
if not self.is_monotonic_increasing:
425431
raise ValueError("Index is not monotonic")
426432
values = self.asi8
427433
return ((values[1:] - values[:-1]) < 2).all()
@@ -432,7 +438,7 @@ def inferred_type(self) -> str:
432438
# indexing
433439
return "period"
434440

435-
def insert(self, loc, item):
441+
def insert(self, loc: int, item):
436442
if not isinstance(item, Period) or self.freq != item.freq:
437443
return self.astype(object).insert(loc, item)
438444

@@ -706,7 +712,7 @@ def _union(self, other, sort):
706712

707713
# ------------------------------------------------------------------------
708714

709-
def memory_usage(self, deep=False):
715+
def memory_usage(self, deep: bool = False) -> int:
710716
result = super().memory_usage(deep=deep)
711717
if hasattr(self, "_cache") and "_int64index" in self._cache:
712718
result += self._int64index.memory_usage(deep=deep)

pandas/core/indexes/timedeltas.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def _formatter_func(self):
184184
# -------------------------------------------------------------------
185185

186186
@doc(Index.astype)
187-
def astype(self, dtype, copy=True):
187+
def astype(self, dtype, copy: bool = True):
188188
dtype = pandas_dtype(dtype)
189189
if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
190190
# Have to repeat the check for 'timedelta64' (not ns) dtype

pandas/core/series.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ def _constructor_expanddim(self) -> Type["DataFrame"]:
394394

395395
# types
396396
@property
397-
def _can_hold_na(self):
397+
def _can_hold_na(self) -> bool:
398398
return self._mgr._can_hold_na
399399

400400
_index = None
@@ -4904,10 +4904,7 @@ def to_timestamp(self, freq=None, how="start", copy=True) -> "Series":
49044904

49054905
if not isinstance(self.index, PeriodIndex):
49064906
raise TypeError(f"unsupported Type {type(self.index).__name__}")
4907-
# error: "PeriodIndex" has no attribute "to_timestamp"
4908-
new_index = self.index.to_timestamp( # type: ignore[attr-defined]
4909-
freq=freq, how=how
4910-
)
4907+
new_index = self.index.to_timestamp(freq=freq, how=how)
49114908
return self._constructor(new_values, index=new_index).__finalize__(
49124909
self, method="to_timestamp"
49134910
)

pandas/tests/extension/arrow/arrays.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ def construct_array_type(cls) -> Type["ArrowStringArray"]:
6868

6969

7070
class ArrowExtensionArray(ExtensionArray):
71+
_data: pa.ChunkedArray
72+
7173
@classmethod
7274
def from_scalars(cls, values):
7375
arr = pa.chunked_array([pa.array(np.asarray(values))])
@@ -129,7 +131,7 @@ def __or__(self, other):
129131
return self._boolean_op(other, operator.or_)
130132

131133
@property
132-
def nbytes(self):
134+
def nbytes(self) -> int:
133135
return sum(
134136
x.size
135137
for chunk in self._data.chunks

0 commit comments

Comments
 (0)