TYP: annotations (pandas-dev#40955)

jbrockmendel · JulianWgs · commit 4b54433d81c4 · 2021-07-03T13:07:59.000+02:00
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -791,7 +791,8 @@ cdef class StringHashTable(HashTable):
             raise KeyError(key)
 
     @cython.boundscheck(False)
-    def get_indexer(self, ndarray[object] values):
+    def get_indexer(self, ndarray[object] values) -> ndarray:
+        # -> np.ndarray[np.intp]
         cdef:
             Py_ssize_t i, n = len(values)
             ndarray[intp_t] labels = np.empty(n, dtype=np.intp)
diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
@@ -4,6 +4,7 @@
 from typing import (
     Any,
     Callable,
+    Generator,
 )
 
 import numpy as np
@@ -52,8 +53,7 @@ def is_bool_array(values: np.ndarray, skipna: bool = False): ...
 
 def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> ArrayLike: ...
 
-# TODO: gen: Generator?
-def fast_unique_multiple_list_gen(gen: object, sort: bool = True) -> list: ...
+def fast_unique_multiple_list_gen(gen: Generator, sort: bool = True) -> list: ...
 def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ...
 def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ...
 
@@ -90,10 +90,9 @@ def infer_datetimelike_array(
     arr: np.ndarray  # np.ndarray[object]
 ) -> str: ...
 
-# TODO: new_dtype -> np.dtype?
 def astype_intsafe(
     arr: np.ndarray,  # np.ndarray[object]
-    new_dtype,
+    new_dtype: np.dtype,
 ) -> np.ndarray: ...
 
 def fast_zip(ndarrays: list) -> np.ndarray: ...  # np.ndarray[object]
@@ -134,15 +133,13 @@ def memory_usage_of_objects(
 ) -> int: ...  # np.int64
 
 
-# TODO: f: Callable?
-# TODO: dtype -> DtypeObj?
 def map_infer_mask(
     arr: np.ndarray,
     f: Callable[[Any], Any],
     mask: np.ndarray,  # const uint8_t[:]
     convert: bool = ...,
     na_value: Any = ...,
-    dtype: Any = ...,
+    dtype: np.dtype = ...,
 ) -> ArrayLike: ...
 
 def indices_fast(
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -633,7 +633,7 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def astype_intsafe(ndarray[object] arr, new_dtype) -> ndarray:
+def astype_intsafe(ndarray[object] arr, cnp.dtype new_dtype) -> ndarray:
     cdef:
         Py_ssize_t i, n = len(arr)
         object val
@@ -661,7 +661,8 @@ cpdef ndarray[object] ensure_string_array(
         bint copy=True,
         bint skipna=True,
 ):
-    """Returns a new numpy array with object dtype and only strings and na values.
+    """
+    Returns a new numpy array with object dtype and only strings and na values.
 
     Parameters
     ----------
@@ -679,7 +680,7 @@ cpdef ndarray[object] ensure_string_array(
 
     Returns
     -------
-    ndarray
+    np.ndarray[object]
         An array with the input array's elements casted to str or nan-like.
     """
     cdef:
@@ -2452,7 +2453,8 @@ no_default = NoDefault.no_default  # Sentinel indicating the default value.
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True,
-                   object na_value=no_default, object dtype=object) -> "ArrayLike":
+                   object na_value=no_default, cnp.dtype dtype=np.dtype(object)
+                   ) -> "ArrayLike":
     """
     Substitute for np.vectorize with pandas-friendly dtype inference.
 
@@ -2472,7 +2474,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr
 
     Returns
     -------
-    ndarray
+    np.ndarray or ExtensionArray
     """
     cdef:
         Py_ssize_t i, n
diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
@@ -93,7 +93,7 @@ def build_field_sarray(const int64_t[:] dtindex):
     return out
 
 
-def month_position_check(fields, weekdays):
+def month_position_check(fields, weekdays) -> str | None:
     cdef:
         int32_t daysinmonth, y, m, d
         bint calendar_end = True
@@ -755,7 +755,7 @@ cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit):
     return _floor_int64(values + unit // 2, unit)
 
 
-def round_nsint64(values: np.ndarray, mode: RoundTo, nanos) -> np.ndarray:
+def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray:
     """
     Applies rounding mode at given frequency
 
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -609,7 +609,7 @@ def argsort(
 
         Returns
         -------
-        ndarray
+        np.ndarray[np.intp]
             Array of indices that sort ``self``. If NaN values are contained,
             NaN values are placed at the end.
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1599,7 +1599,7 @@ def argsort(self, ascending=True, kind="quicksort", **kwargs):
 
         Returns
         -------
-        numpy.array
+        np.ndarray[np.intp]
 
         See Also
         --------
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -135,10 +135,10 @@ class TimedeltaArray(dtl.TimelikeOps):
     # define my properties & methods for delegation
     _other_ops: list[str] = []
     _bool_ops: list[str] = []
-    _object_ops = ["freq"]
-    _field_ops = ["days", "seconds", "microseconds", "nanoseconds"]
-    _datetimelike_ops = _field_ops + _object_ops + _bool_ops
-    _datetimelike_methods = [
+    _object_ops: list[str] = ["freq"]
+    _field_ops: list[str] = ["days", "seconds", "microseconds", "nanoseconds"]
+    _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops
+    _datetimelike_methods: list[str] = [
         "to_pytimedelta",
         "total_seconds",
         "round",
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
@@ -164,7 +164,7 @@ def _get_combined_index(
     return index
 
 
-def union_indexes(indexes, sort=True) -> Index:
+def union_indexes(indexes, sort: bool = True) -> Index:
     """
     Return the union of indexes.
 
@@ -273,7 +273,7 @@ def _sanitize_and_check(indexes):
         return indexes, "array"
 
 
-def all_indexes_same(indexes):
+def all_indexes_same(indexes) -> bool:
     """
     Determine if all indexes contain the same elements.
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -215,7 +215,7 @@ def join(
     return cast(F, join)
 
 
-def disallow_kwargs(kwargs: dict[str, Any]):
+def disallow_kwargs(kwargs: dict[str, Any]) -> None:
     if kwargs:
         raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
 
@@ -626,7 +626,7 @@ def _maybe_check_unique(self) -> None:
             raise DuplicateLabelError(msg)
 
     @final
-    def _format_duplicate_message(self):
+    def _format_duplicate_message(self) -> DataFrame:
         """
         Construct the DataFrame for a DuplicateLabelError.
 
@@ -789,7 +789,7 @@ def __array_wrap__(self, result, context=None):
         return Index(result, **attrs)
 
     @cache_readonly
-    def dtype(self):
+    def dtype(self) -> DtypeObj:
         """
         Return the dtype object of the underlying data.
         """
@@ -1064,11 +1064,11 @@ def copy(
         return new_index
 
     @final
-    def __copy__(self, **kwargs):
+    def __copy__(self: _IndexT, **kwargs) -> _IndexT:
         return self.copy(**kwargs)
 
     @final
-    def __deepcopy__(self, memo=None):
+    def __deepcopy__(self: _IndexT, memo=None) -> _IndexT:
         """
         Parameters
         ----------
@@ -1354,7 +1354,7 @@ def to_series(self, index=None, name: Hashable = None) -> Series:
 
         return Series(self._values.copy(), index=index, name=name)
 
-    def to_frame(self, index: bool = True, name=None) -> DataFrame:
+    def to_frame(self, index: bool = True, name: Hashable = None) -> DataFrame:
         """
         Create a DataFrame with a column containing the Index.
 
@@ -1426,7 +1426,7 @@ def name(self):
         return self._name
 
     @name.setter
-    def name(self, value):
+    def name(self, value: Hashable):
         if self._no_setting_name:
             # Used in MultiIndex.levels to avoid silently ignoring name updates.
             raise RuntimeError(
@@ -2367,7 +2367,7 @@ def _is_all_dates(self) -> bool:
 
     @cache_readonly
     @final
-    def is_all_dates(self):
+    def is_all_dates(self) -> bool:
         """
         Whether or not the index values only consist of dates.
         """
@@ -3380,7 +3380,7 @@ def get_loc(self, key, method=None, tolerance=None):
 
         Returns
         -------
-        indexer : ndarray of int
+        indexer : np.ndarray[np.intp]
             Integers from 0 to n - 1 indicating that the index at these
             positions matches the corresponding target values. Missing values
             in the target are marked by -1.
@@ -4610,7 +4610,7 @@ def _can_hold_identifiers_and_holds_name(self, name) -> bool:
             return name in self
         return False
 
-    def append(self, other) -> Index:
+    def append(self, other: Index | Sequence[Index]) -> Index:
         """
         Append a collection of Index options together.
 
@@ -4627,7 +4627,9 @@ def append(self, other) -> Index:
         if isinstance(other, (list, tuple)):
             to_concat += list(other)
         else:
-            to_concat.append(other)
+            # error: Argument 1 to "append" of "list" has incompatible type
+            # "Union[Index, Sequence[Index]]"; expected "Index"
+            to_concat.append(other)  # type: ignore[arg-type]
 
         for obj in to_concat:
             if not isinstance(obj, Index):
@@ -5181,11 +5183,11 @@ def set_value(self, arr, key, value):
 
         Returns
         -------
-        indexer : ndarray of int
+        indexer : np.ndarray[np.intp]
             Integers from 0 to n - 1 indicating that the index at these
             positions matches the corresponding target values. Missing values
             in the target are marked by -1.
-        missing : ndarray of int
+        missing : np.ndarray[np.intp]
             An indexer into the target of the values not found.
             These correspond to the -1 in the indexer array.
         """
@@ -5227,7 +5229,7 @@ def get_indexer_for(self, target, **kwargs) -> np.ndarray:
 
         Returns
         -------
-        numpy.ndarray
+        np.ndarray[np.intp]
             List of indices.
         """
         if self._index_as_unique:
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -457,8 +457,8 @@ def reindex(
         # in which case we are going to conform to the passed Categorical
         new_target = np.asarray(new_target)
         if is_categorical_dtype(target):
-            new_target = Categorical(new_target, dtype=target.dtype)
-            new_target = type(self)._simple_new(new_target, name=self.name)
+            cat = Categorical(new_target, dtype=target.dtype)
+            new_target = type(self)._simple_new(cat, name=self.name)
         else:
             new_target = Index(new_target, name=self.name)
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -391,7 +391,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
     # --------------------------------------------------------------------
     # Rendering Methods
 
-    def _mpl_repr(self):
+    def _mpl_repr(self) -> np.ndarray:
         # how to represent ourselves to matplotlib
         return ints_to_pydatetime(self.asi8, self.tz)
 
@@ -448,7 +448,7 @@ def _maybe_utc_convert(self, other: Index) -> tuple[DatetimeIndex, Index]:
 
     # --------------------------------------------------------------------
 
-    def _get_time_micros(self):
+    def _get_time_micros(self) -> np.ndarray:
         """
         Return the number of microseconds since midnight.
 
@@ -541,7 +541,7 @@ def to_series(self, keep_tz=lib.no_default, index=None, name=None):
 
         return Series(values, index=index, name=name)
 
-    def snap(self, freq="S"):
+    def snap(self, freq="S") -> DatetimeIndex:
         """
         Snap time stamps to nearest occurring frequency.
 
@@ -891,7 +891,7 @@ def indexer_at_time(self, time, asof: bool = False) -> np.ndarray:
         else:
             time_micros = self._get_time_micros()
         micros = _time_to_micros(time)
-        return (micros == time_micros).nonzero()[0]
+        return (time_micros == micros).nonzero()[0]
 
     def indexer_between_time(
         self, start_time, end_time, include_start: bool = True, include_end: bool = True
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -390,7 +390,7 @@ def from_tuples(
     # --------------------------------------------------------------------
 
     @cache_readonly
-    def _engine(self):
+    def _engine(self) -> IntervalTree:
         left = self._maybe_convert_i8(self.left)
         right = self._maybe_convert_i8(self.right)
         return IntervalTree(left, right, closed=self.closed)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -2673,6 +2673,7 @@ def _get_indexer(
         limit: int | None = None,
         tolerance=None,
     ) -> np.ndarray:
+        # returned ndarray is np.intp
 
         # empty indexer
         if not len(target):
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
diff --git a/pandas/core/series.py b/pandas/core/series.py