Skip to content

Commit c894b3f

Browse files
authored
TYP: datetimelike arrays (#52334)
* TYP: use Self in Categorical * TYP: PeriodArray, Categorical * TYP: datetimelike arrays
1 parent 15fd7d7 commit c894b3f

File tree

7 files changed

+93
-63
lines changed

7 files changed

+93
-63
lines changed

pandas/core/arrays/categorical.py

+20-21
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@
109109
Shape,
110110
SortKind,
111111
npt,
112-
type_t,
113112
)
114113

115114
from pandas import (
@@ -473,8 +472,8 @@ def _internal_fill_value(self) -> int:
473472
@classmethod
474473
def _from_sequence(
475474
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
476-
) -> Categorical:
477-
return Categorical(scalars, dtype=dtype, copy=copy)
475+
) -> Self:
476+
return cls(scalars, dtype=dtype, copy=copy)
478477

479478
@overload
480479
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
@@ -563,7 +562,7 @@ def to_list(self):
563562
@classmethod
564563
def _from_inferred_categories(
565564
cls, inferred_categories, inferred_codes, dtype, true_values=None
566-
):
565+
) -> Self:
567566
"""
568567
Construct a Categorical from inferred values.
569568
@@ -632,7 +631,7 @@ def _from_inferred_categories(
632631
@classmethod
633632
def from_codes(
634633
cls, codes, categories=None, ordered=None, dtype: Dtype | None = None
635-
) -> Categorical:
634+
) -> Self:
636635
"""
637636
Make a Categorical type from codes and categories or dtype.
638637
@@ -792,7 +791,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:
792791

793792
super().__init__(self._ndarray, new_dtype)
794793

795-
def _set_dtype(self, dtype: CategoricalDtype) -> Categorical:
794+
def _set_dtype(self, dtype: CategoricalDtype) -> Self:
796795
"""
797796
Internal method for directly updating the CategoricalDtype
798797
@@ -808,7 +807,7 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Categorical:
808807
codes = recode_for_categories(self.codes, self.categories, dtype.categories)
809808
return type(self)(codes, dtype=dtype, fastpath=True)
810809

811-
def set_ordered(self, value: bool) -> Categorical:
810+
def set_ordered(self, value: bool) -> Self:
812811
"""
813812
Set the ordered attribute to the boolean value.
814813
@@ -822,7 +821,7 @@ def set_ordered(self, value: bool) -> Categorical:
822821
NDArrayBacked.__init__(cat, cat._ndarray, new_dtype)
823822
return cat
824823

825-
def as_ordered(self) -> Categorical:
824+
def as_ordered(self) -> Self:
826825
"""
827826
Set the Categorical to be ordered.
828827
@@ -833,7 +832,7 @@ def as_ordered(self) -> Categorical:
833832
"""
834833
return self.set_ordered(True)
835834

836-
def as_unordered(self) -> Categorical:
835+
def as_unordered(self) -> Self:
837836
"""
838837
Set the Categorical to be unordered.
839838
@@ -912,7 +911,7 @@ def set_categories(self, new_categories, ordered=None, rename: bool = False):
912911
NDArrayBacked.__init__(cat, codes, new_dtype)
913912
return cat
914913

915-
def rename_categories(self, new_categories) -> Categorical:
914+
def rename_categories(self, new_categories) -> Self:
916915
"""
917916
Rename categories.
918917
@@ -984,7 +983,7 @@ def rename_categories(self, new_categories) -> Categorical:
984983
cat._set_categories(new_categories)
985984
return cat
986985

987-
def reorder_categories(self, new_categories, ordered=None):
986+
def reorder_categories(self, new_categories, ordered=None) -> Self:
988987
"""
989988
Reorder categories as specified in new_categories.
990989
@@ -1027,7 +1026,7 @@ def reorder_categories(self, new_categories, ordered=None):
10271026
)
10281027
return self.set_categories(new_categories, ordered=ordered)
10291028

1030-
def add_categories(self, new_categories) -> Categorical:
1029+
def add_categories(self, new_categories) -> Self:
10311030
"""
10321031
Add new categories.
10331032
@@ -1096,7 +1095,7 @@ def add_categories(self, new_categories) -> Categorical:
10961095
NDArrayBacked.__init__(cat, codes, new_dtype)
10971096
return cat
10981097

1099-
def remove_categories(self, removals):
1098+
def remove_categories(self, removals) -> Self:
11001099
"""
11011100
Remove the specified categories.
11021101
@@ -1152,7 +1151,7 @@ def remove_categories(self, removals):
11521151

11531152
return self.set_categories(new_categories, ordered=self.ordered, rename=False)
11541153

1155-
def remove_unused_categories(self) -> Categorical:
1154+
def remove_unused_categories(self) -> Self:
11561155
"""
11571156
Remove categories which are not used.
11581157
@@ -1454,7 +1453,7 @@ def memory_usage(self, deep: bool = False) -> int:
14541453
"""
14551454
return self._codes.nbytes + self.dtype.categories.memory_usage(deep=deep)
14561455

1457-
def isna(self) -> np.ndarray:
1456+
def isna(self) -> npt.NDArray[np.bool_]:
14581457
"""
14591458
Detect missing values
14601459
@@ -1475,7 +1474,7 @@ def isna(self) -> np.ndarray:
14751474

14761475
isnull = isna
14771476

1478-
def notna(self) -> np.ndarray:
1477+
def notna(self) -> npt.NDArray[np.bool_]:
14791478
"""
14801479
Inverse of isna
14811480
@@ -1544,8 +1543,8 @@ def value_counts(self, dropna: bool = True) -> Series:
15441543
# "ExtensionDtype"
15451544
@classmethod
15461545
def _empty( # type: ignore[override]
1547-
cls: type_t[Categorical], shape: Shape, dtype: CategoricalDtype
1548-
) -> Categorical:
1546+
cls, shape: Shape, dtype: CategoricalDtype
1547+
) -> Self:
15491548
"""
15501549
Analogous to np.empty(shape, dtype=dtype)
15511550
@@ -1651,7 +1650,7 @@ def sort_values(
16511650
inplace: Literal[False] = ...,
16521651
ascending: bool = ...,
16531652
na_position: str = ...,
1654-
) -> Categorical:
1653+
) -> Self:
16551654
...
16561655

16571656
@overload
@@ -1666,7 +1665,7 @@ def sort_values(
16661665
inplace: bool = False,
16671666
ascending: bool = True,
16681667
na_position: str = "last",
1669-
) -> Categorical | None:
1668+
) -> Self | None:
16701669
"""
16711670
Sort the Categorical by category value returning a new
16721671
Categorical by default.
@@ -1769,7 +1768,7 @@ def _rank(
17691768
pct=pct,
17701769
)
17711770

1772-
def _values_for_rank(self):
1771+
def _values_for_rank(self) -> np.ndarray:
17731772
"""
17741773
For correctly ranking ordered categorical data. See GH#15420
17751774

pandas/core/arrays/datetimelike.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,7 @@ def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray:
10941094
res_m8 = res_values.view(f"timedelta64[{self.unit}]")
10951095

10961096
new_freq = self._get_arithmetic_result_freq(other)
1097+
new_freq = cast("Tick | None", new_freq)
10971098
return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq)
10981099

10991100
@final
@@ -1161,7 +1162,12 @@ def _add_timedeltalike(self, other: Timedelta | TimedeltaArray):
11611162

11621163
new_freq = self._get_arithmetic_result_freq(other)
11631164

1164-
return type(self)._simple_new(res_values, dtype=self.dtype, freq=new_freq)
1165+
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
1166+
# incompatible type "Union[dtype[datetime64], DatetimeTZDtype,
1167+
# dtype[timedelta64]]"; expected "Union[dtype[datetime64], DatetimeTZDtype]"
1168+
return type(self)._simple_new(
1169+
res_values, dtype=self.dtype, freq=new_freq # type: ignore[arg-type]
1170+
)
11651171

11661172
@final
11671173
def _add_nat(self):
@@ -1179,7 +1185,12 @@ def _add_nat(self):
11791185
result = np.empty(self.shape, dtype=np.int64)
11801186
result.fill(iNaT)
11811187
result = result.view(self._ndarray.dtype) # preserve reso
1182-
return type(self)._simple_new(result, dtype=self.dtype, freq=None)
1188+
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
1189+
# incompatible type "Union[dtype[timedelta64], dtype[datetime64],
1190+
# DatetimeTZDtype]"; expected "Union[dtype[datetime64], DatetimeTZDtype]"
1191+
return type(self)._simple_new(
1192+
result, dtype=self.dtype, freq=None # type: ignore[arg-type]
1193+
)
11831194

11841195
@final
11851196
def _sub_nat(self):

pandas/core/arrays/datetimes.py

+18-15
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
from pandas._typing import (
8282
DateTimeErrorChoices,
8383
IntervalClosedType,
84+
Self,
8485
TimeAmbiguous,
8586
TimeNonexistent,
8687
npt,
@@ -92,7 +93,9 @@
9293
_midnight = time(0, 0)
9394

9495

95-
def tz_to_dtype(tz: tzinfo | None, unit: str = "ns"):
96+
def tz_to_dtype(
97+
tz: tzinfo | None, unit: str = "ns"
98+
) -> np.dtype[np.datetime64] | DatetimeTZDtype:
9699
"""
97100
Return a datetime64[ns] dtype appropriate for the given timezone.
98101
@@ -254,7 +257,7 @@ def _scalar_type(self) -> type[Timestamp]:
254257
# -----------------------------------------------------------------
255258
# Constructors
256259

257-
_dtype: np.dtype | DatetimeTZDtype
260+
_dtype: np.dtype[np.datetime64] | DatetimeTZDtype
258261
_freq: BaseOffset | None = None
259262
_default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__
260263

@@ -269,10 +272,10 @@ def _validate_dtype(cls, values, dtype):
269272
@classmethod
270273
def _simple_new( # type: ignore[override]
271274
cls,
272-
values: np.ndarray,
275+
values: npt.NDArray[np.datetime64],
273276
freq: BaseOffset | None = None,
274-
dtype=DT64NS_DTYPE,
275-
) -> DatetimeArray:
277+
dtype: np.dtype[np.datetime64] | DatetimeTZDtype = DT64NS_DTYPE,
278+
) -> Self:
276279
assert isinstance(values, np.ndarray)
277280
assert dtype.kind == "M"
278281
if isinstance(dtype, np.dtype):
@@ -386,7 +389,7 @@ def _generate_range( # type: ignore[override]
386389
inclusive: IntervalClosedType = "both",
387390
*,
388391
unit: str | None = None,
389-
) -> DatetimeArray:
392+
) -> Self:
390393
periods = dtl.validate_periods(periods)
391394
if freq is None and any(x is None for x in [periods, start, end]):
392395
raise ValueError("Must provide freq argument if no data is supplied")
@@ -535,7 +538,7 @@ def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:
535538
# error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype"
536539
# incompatible with return type "ExtensionDtype" in supertype
537540
# "ExtensionArray"
538-
def dtype(self) -> np.dtype | DatetimeTZDtype: # type: ignore[override]
541+
def dtype(self) -> np.dtype[np.datetime64] | DatetimeTZDtype: # type: ignore[override] # noqa:E501
539542
"""
540543
The dtype for the DatetimeArray.
541544
@@ -753,7 +756,7 @@ def _assert_tzawareness_compat(self, other) -> None:
753756
# -----------------------------------------------------------------
754757
# Arithmetic Methods
755758

756-
def _add_offset(self, offset) -> DatetimeArray:
759+
def _add_offset(self, offset) -> Self:
757760
assert not isinstance(offset, Tick)
758761

759762
if self.tz is not None:
@@ -776,7 +779,7 @@ def _add_offset(self, offset) -> DatetimeArray:
776779
return result.tz_localize(self.tz)
777780

778781
else:
779-
result = DatetimeArray._simple_new(result, dtype=result.dtype)
782+
result = type(self)._simple_new(result, dtype=result.dtype)
780783
if self.tz is not None:
781784
result = result.tz_localize(self.tz)
782785

@@ -797,7 +800,7 @@ def _local_timestamps(self) -> npt.NDArray[np.int64]:
797800
return self.asi8
798801
return tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)
799802

800-
def tz_convert(self, tz) -> DatetimeArray:
803+
def tz_convert(self, tz) -> Self:
801804
"""
802805
Convert tz-aware Datetime Array/Index from one time zone to another.
803806
@@ -879,7 +882,7 @@ def tz_localize(
879882
tz,
880883
ambiguous: TimeAmbiguous = "raise",
881884
nonexistent: TimeNonexistent = "raise",
882-
) -> DatetimeArray:
885+
) -> Self:
883886
"""
884887
Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index.
885888
@@ -1045,18 +1048,18 @@ def tz_localize(
10451048
nonexistent=nonexistent,
10461049
creso=self._creso,
10471050
)
1048-
new_dates = new_dates.view(f"M8[{self.unit}]")
1051+
new_dates_dt64 = new_dates.view(f"M8[{self.unit}]")
10491052
dtype = tz_to_dtype(tz, unit=self.unit)
10501053

10511054
freq = None
1052-
if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])):
1055+
if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates_dt64[0])):
10531056
# we can preserve freq
10541057
# TODO: Also for fixed-offsets
10551058
freq = self.freq
10561059
elif tz is None and self.tz is None:
10571060
# no-op
10581061
freq = self.freq
1059-
return self._simple_new(new_dates, dtype=dtype, freq=freq)
1062+
return self._simple_new(new_dates_dt64, dtype=dtype, freq=freq)
10601063

10611064
# ----------------------------------------------------------------
10621065
# Conversion Methods - Vectorized analogues of Timestamp methods
@@ -1071,7 +1074,7 @@ def to_pydatetime(self) -> npt.NDArray[np.object_]:
10711074
"""
10721075
return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._creso)
10731076

1074-
def normalize(self) -> DatetimeArray:
1077+
def normalize(self) -> Self:
10751078
"""
10761079
Convert times to midnight.
10771080

0 commit comments

Comments
 (0)