TYP: mostly datetimelike (#36696)

jbrockmendel · web-flow · commit 7a9307b47138 · 2020-10-01T10:05:43.000+01:00
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -288,6 +288,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
     # tolist is not actually deprecated, just suppressed in the __dir__
     _deprecations = PandasObject._deprecations | frozenset(["tolist"])
     _typ = "categorical"
+    _can_hold_na = True
 
     def __init__(
         self, values, categories=None, ordered=None, dtype=None, fastpath=False
@@ -1268,10 +1269,10 @@ def __setstate__(self, state):
             setattr(self, k, v)
 
     @property
-    def nbytes(self):
+    def nbytes(self) -> int:
         return self._codes.nbytes + self.dtype.categories.values.nbytes
 
-    def memory_usage(self, deep=False):
+    def memory_usage(self, deep: bool = False) -> int:
         """
         Memory usage of my values
 
@@ -2144,10 +2145,6 @@ def equals(self, other: object) -> bool:
             return np.array_equal(self._codes, other_codes)
         return False
 
-    @property
-    def _can_hold_na(self):
-        return True
-
     @classmethod
     def _concat_same_type(self, to_concat):
         from pandas.core.dtypes.concat import union_categoricals
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -117,7 +117,9 @@ class AttributesMixin:
     _data: np.ndarray
 
     @classmethod
-    def _simple_new(cls, values: np.ndarray, **kwargs):
+    def _simple_new(
+        cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None
+    ):
         raise AbstractMethodError(cls)
 
     @property
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -6,6 +6,7 @@
 
 from pandas._libs import lib, tslib
 from pandas._libs.tslibs import (
+    BaseOffset,
     NaT,
     NaTType,
     Resolution,
@@ -283,7 +284,9 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False):
             type(self)._validate_frequency(self, freq)
 
     @classmethod
-    def _simple_new(cls, values, freq=None, dtype=DT64NS_DTYPE):
+    def _simple_new(
+        cls, values, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE
+    ) -> "DatetimeArray":
         assert isinstance(values, np.ndarray)
         if values.dtype != DT64NS_DTYPE:
             assert values.dtype == "i8"
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -174,11 +174,13 @@ def __init__(self, values, freq=None, dtype=None, copy=False):
         self._dtype = PeriodDtype(freq)
 
     @classmethod
-    def _simple_new(cls, values: np.ndarray, freq=None, **kwargs) -> "PeriodArray":
+    def _simple_new(
+        cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None
+    ) -> "PeriodArray":
         # alias for PeriodArray.__init__
         assertion_msg = "Should be numpy array of type i8"
         assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg
-        return cls(values, freq=freq, **kwargs)
+        return cls(values, freq=freq, dtype=dtype)
 
     @classmethod
     def _from_sequence(
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -308,7 +308,7 @@ def value_counts(self, dropna=False):
 
         return value_counts(self._ndarray, dropna=dropna).astype("Int64")
 
-    def memory_usage(self, deep=False):
+    def memory_usage(self, deep: bool = False) -> int:
         result = self._ndarray.nbytes
         if deep:
             return result + lib.memory_usage_of_objects(self._ndarray)
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -1,10 +1,11 @@
 from datetime import timedelta
-from typing import List, Union
+from typing import List, Optional, Union
 
 import numpy as np
 
 from pandas._libs import lib, tslibs
 from pandas._libs.tslibs import (
+    BaseOffset,
     NaT,
     NaTType,
     Period,
@@ -45,8 +46,8 @@
 from pandas.core.ops.common import unpack_zerodim_and_defer
 
 
-def _field_accessor(name, alias, docstring=None):
-    def f(self):
+def _field_accessor(name: str, alias: str, docstring: str):
+    def f(self) -> np.ndarray:
         values = self.asi8
         result = get_timedelta_field(values, alias)
         if self._hasnans:
@@ -121,7 +122,7 @@ def _box_func(self, x) -> Union[Timedelta, NaTType]:
         return Timedelta(x, unit="ns")
 
     @property
-    def dtype(self):
+    def dtype(self) -> np.dtype:
         """
         The dtype for the TimedeltaArray.
 
@@ -196,7 +197,9 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False):
             type(self)._validate_frequency(self, freq)
 
     @classmethod
-    def _simple_new(cls, values, freq=None, dtype=TD64NS_DTYPE):
+    def _simple_new(
+        cls, values, freq: Optional[BaseOffset] = None, dtype=TD64NS_DTYPE
+    ) -> "TimedeltaArray":
         assert dtype == TD64NS_DTYPE, dtype
         assert isinstance(values, np.ndarray), type(values)
         if values.dtype != TD64NS_DTYPE:
@@ -211,8 +214,13 @@ def _simple_new(cls, values, freq=None, dtype=TD64NS_DTYPE):
 
     @classmethod
     def _from_sequence(
-        cls, data, dtype=TD64NS_DTYPE, copy=False, freq=lib.no_default, unit=None
-    ):
+        cls,
+        data,
+        dtype=TD64NS_DTYPE,
+        copy: bool = False,
+        freq=lib.no_default,
+        unit=None,
+    ) -> "TimedeltaArray":
         if dtype:
             _validate_td64_dtype(dtype)
 
@@ -240,7 +248,9 @@ def _from_sequence(
         return result
 
     @classmethod
-    def _generate_range(cls, start, end, periods, freq, closed=None):
+    def _generate_range(
+        cls, start, end, periods, freq, closed=None
+    ) -> "TimedeltaArray":
 
         periods = dtl.validate_periods(periods)
         if freq is None and any(x is None for x in [periods, start, end]):
@@ -298,7 +308,7 @@ def _maybe_clear_freq(self):
     # ----------------------------------------------------------------
     # Array-Like / EA-Interface Methods
 
-    def astype(self, dtype, copy=True):
+    def astype(self, dtype, copy: bool = True):
         # We handle
         #   --> timedelta64[ns]
         #   --> timedelta64
@@ -461,7 +471,7 @@ def _addsub_object_array(self, other, op):
             ) from err
 
     @unpack_zerodim_and_defer("__mul__")
-    def __mul__(self, other):
+    def __mul__(self, other) -> "TimedeltaArray":
         if is_scalar(other):
             # numpy will accept float and int, raise TypeError for others
             result = self._data * other
@@ -737,22 +747,22 @@ def __rdivmod__(self, other):
         res2 = other - res1 * self
         return res1, res2
 
-    def __neg__(self):
+    def __neg__(self) -> "TimedeltaArray":
         if self.freq is not None:
             return type(self)(-self._data, freq=-self.freq)
         return type(self)(-self._data)
 
-    def __pos__(self):
+    def __pos__(self) -> "TimedeltaArray":
         return type(self)(self._data, freq=self.freq)
 
-    def __abs__(self):
+    def __abs__(self) -> "TimedeltaArray":
         # Note: freq is not preserved
         return type(self)(np.abs(self._data))
 
     # ----------------------------------------------------------------
     # Conversion Methods - Vectorized analogues of Timedelta methods
 
-    def total_seconds(self):
+    def total_seconds(self) -> np.ndarray:
         """
         Return total duration of each element expressed in seconds.
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1347,7 +1347,7 @@ def memory_usage(self, deep=False):
 
         Parameters
         ----------
-        deep : bool
+        deep : bool, default False
             Introspect the data deeply, interrogate
             `object` dtypes for system-level memory consumption.
 
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -105,7 +105,7 @@ def _is_all_dates(self) -> bool:
     # Abstract data attributes
 
     @property
-    def values(self):
+    def values(self) -> np.ndarray:
         # Note: PeriodArray overrides this to return an ndarray of objects.
         return self._data._data
 
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
@@ -60,7 +60,7 @@ def _new_PeriodIndex(cls, **d):
 
 
 @inherit_names(
-    ["strftime", "to_timestamp", "start_time", "end_time"] + PeriodArray._field_ops,
+    ["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
     PeriodArray,
     wrap=True,
 )
@@ -149,12 +149,18 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index):
 
     # --------------------------------------------------------------------
     # methods that dispatch to array and wrap result in PeriodIndex
+    # These are defined here instead of via inherit_names for mypy
 
     @doc(PeriodArray.asfreq)
     def asfreq(self, freq=None, how: str = "E") -> "PeriodIndex":
         arr = self._data.asfreq(freq, how)
         return type(self)._simple_new(arr, name=self.name)
 
+    @doc(PeriodArray.to_timestamp)
+    def to_timestamp(self, freq=None, how="start") -> DatetimeIndex:
+        arr = self._data.to_timestamp(freq, how)
+        return DatetimeIndex._simple_new(arr, name=self.name)
+
     # ------------------------------------------------------------------------
     # Index Constructors
 
@@ -244,11 +250,11 @@ def _simple_new(cls, values: PeriodArray, name: Label = None):
     # Data
 
     @property
-    def values(self):
+    def values(self) -> np.ndarray:
         return np.asarray(self)
 
     @property
-    def _has_complex_internals(self):
+    def _has_complex_internals(self) -> bool:
         # used to avoid libreduction code paths, which raise or require conversion
         return True
 
@@ -402,7 +408,7 @@ def asof_locs(self, where, mask: np.ndarray) -> np.ndarray:
         return result
 
     @doc(Index.astype)
-    def astype(self, dtype, copy=True, how="start"):
+    def astype(self, dtype, copy: bool = True, how="start"):
         dtype = pandas_dtype(dtype)
 
         if is_datetime64_any_dtype(dtype):
@@ -421,7 +427,7 @@ def is_full(self) -> bool:
         """
         if len(self) == 0:
             return True
-        if not self.is_monotonic:
+        if not self.is_monotonic_increasing:
             raise ValueError("Index is not monotonic")
         values = self.asi8
         return ((values[1:] - values[:-1]) < 2).all()
@@ -432,7 +438,7 @@ def inferred_type(self) -> str:
         # indexing
         return "period"
 
-    def insert(self, loc, item):
+    def insert(self, loc: int, item):
         if not isinstance(item, Period) or self.freq != item.freq:
             return self.astype(object).insert(loc, item)
 
@@ -706,7 +712,7 @@ def _union(self, other, sort):
 
     # ------------------------------------------------------------------------
 
-    def memory_usage(self, deep=False):
+    def memory_usage(self, deep: bool = False) -> int:
         result = super().memory_usage(deep=deep)
         if hasattr(self, "_cache") and "_int64index" in self._cache:
             result += self._int64index.memory_usage(deep=deep)
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
@@ -184,7 +184,7 @@ def _formatter_func(self):
     # -------------------------------------------------------------------
 
     @doc(Index.astype)
-    def astype(self, dtype, copy=True):
+    def astype(self, dtype, copy: bool = True):
         dtype = pandas_dtype(dtype)
         if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
             # Have to repeat the check for 'timedelta64' (not ns) dtype
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -394,7 +394,7 @@ def _constructor_expanddim(self) -> Type["DataFrame"]:
 
     # types
     @property
-    def _can_hold_na(self):
+    def _can_hold_na(self) -> bool:
         return self._mgr._can_hold_na
 
     _index = None
@@ -4904,10 +4904,7 @@ def to_timestamp(self, freq=None, how="start", copy=True) -> "Series":
 
         if not isinstance(self.index, PeriodIndex):
             raise TypeError(f"unsupported Type {type(self.index).__name__}")
-        # error: "PeriodIndex" has no attribute "to_timestamp"
-        new_index = self.index.to_timestamp(  # type: ignore[attr-defined]
-            freq=freq, how=how
-        )
+        new_index = self.index.to_timestamp(freq=freq, how=how)
         return self._constructor(new_values, index=new_index).__finalize__(
             self, method="to_timestamp"
         )
diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py
@@ -68,6 +68,8 @@ def construct_array_type(cls) -> Type["ArrowStringArray"]:
 
 
 class ArrowExtensionArray(ExtensionArray):
+    _data: pa.ChunkedArray
+
     @classmethod
     def from_scalars(cls, values):
         arr = pa.chunked_array([pa.array(np.asarray(values))])
@@ -129,7 +131,7 @@ def __or__(self, other):
         return self._boolean_op(other, operator.or_)
 
     @property
-    def nbytes(self):
+    def nbytes(self) -> int:
         return sum(
             x.size
             for chunk in self._data.chunks