pandas-dev · jreback · Apr 5, 2021 · Apr 3, 2021 · Apr 3, 2021 · Apr 3, 2021
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -106,7 +106,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
                          ndarray[int64_t] counts,
                          ndarray[float64_t, ndim=2] values,
                          ndarray[intp_t] labels,
-                         Py_ssize_t min_count=-1):
+                         Py_ssize_t min_count=-1) -> None:
     """
     Only aggregates on axis=0
     """
@@ -148,7 +148,7 @@ def group_cumprod_float64(float64_t[:, ::1] out,
                           const intp_t[:] labels,
                           int ngroups,
                           bint is_datetimelike,
-                          bint skipna=True):
+                          bint skipna=True) -> None:
     """
     Cumulative product of columns of `values`, in row groups `labels`.
 
@@ -205,7 +205,7 @@ def group_cumsum(numeric[:, ::1] out,
                  const intp_t[:] labels,
                  int ngroups,
                  is_datetimelike,
-                 bint skipna=True):
+                 bint skipna=True) -> None:
     """
     Cumulative sum of columns of `values`, in row groups `labels`.
 
@@ -270,7 +270,7 @@ def group_cumsum(numeric[:, ::1] out,
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_shift_indexer(int64_t[::1] out, const intp_t[:] labels,
-                        int ngroups, int periods):
+                        int ngroups, int periods) -> None:
     cdef:
         Py_ssize_t N, i, j, ii, lab
         int offset = 0, sign
@@ -322,14 +322,14 @@ def group_shift_indexer(int64_t[::1] out, const intp_t[:] labels,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def group_fillna_indexer(ndarray[int64_t] out, ndarray[intp_t] labels,
-                         ndarray[uint8_t] mask, object direction,
-                         int64_t limit, bint dropna):
+                         ndarray[uint8_t] mask, str direction,
+                         int64_t limit, bint dropna) -> None:
     """
     Indexes how to fill values forwards or backwards within a group.
 
     Parameters
     ----------
-    out : np.ndarray[np.uint8]
+    out : np.ndarray[np.int64]
         Values into which this method will write its results.
     labels : np.ndarray[np.intp]
         Array containing unique label for each group, with its ordering
@@ -392,8 +392,8 @@ def group_any_all(uint8_t[::1] out,
                   const uint8_t[::1] values,
                   const intp_t[:] labels,
                   const uint8_t[::1] mask,
-                  object val_test,
-                  bint skipna):
+                  str val_test,
+                  bint skipna) -> None:
     """
     Aggregated boolean values to show truthfulness of group elements.
 
@@ -465,7 +465,7 @@ def group_add(complexfloating_t[:, ::1] out,
               int64_t[::1] counts,
               ndarray[complexfloating_t, ndim=2] values,
               const intp_t[:] labels,
-              Py_ssize_t min_count=0):
+              Py_ssize_t min_count=0) -> None:
     """
     Only aggregates on axis=0 using Kahan summation
     """
@@ -518,7 +518,7 @@ def group_prod(floating[:, ::1] out,
                int64_t[::1] counts,
                ndarray[floating, ndim=2] values,
                const intp_t[:] labels,
-               Py_ssize_t min_count=0):
+               Py_ssize_t min_count=0) -> None:
     """
     Only aggregates on axis=0
     """
@@ -568,7 +568,7 @@ def group_var(floating[:, ::1] out,
               ndarray[floating, ndim=2] values,
               const intp_t[:] labels,
               Py_ssize_t min_count=-1,
-              int64_t ddof=1):
+              int64_t ddof=1) -> None:
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         floating val, ct, oldmean
@@ -621,7 +621,7 @@ def group_mean(floating[:, ::1] out,
                int64_t[::1] counts,
                ndarray[floating, ndim=2] values,
                const intp_t[::1] labels,
-               Py_ssize_t min_count=-1):
+               Py_ssize_t min_count=-1) -> None:
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         floating val, count, y, t
@@ -673,7 +673,7 @@ def group_ohlc(floating[:, ::1] out,
                int64_t[::1] counts,
                ndarray[floating, ndim=2] values,
                const intp_t[:] labels,
-               Py_ssize_t min_count=-1):
+               Py_ssize_t min_count=-1) -> None:
     """
     Only aggregates on axis=0
     """
@@ -721,7 +721,7 @@ def group_quantile(ndarray[float64_t] out,
                    ndarray[intp_t] labels,
                    ndarray[uint8_t] mask,
                    float64_t q,
-                   object interpolation):
+                   str interpolation) -> None:
     """
     Calculate the quantile per group.
 
@@ -733,8 +733,6 @@ def group_quantile(ndarray[float64_t] out,
         Array containing the values to apply the function against.
     labels : ndarray[np.intp]
         Array containing the unique group labels.
-    values : ndarray
-        Array containing the values to apply the function against.
     q : float
         The quantile value to search for.
     interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'}
@@ -865,7 +863,7 @@ def group_last(rank_t[:, ::1] out,
                int64_t[::1] counts,
                ndarray[rank_t, ndim=2] values,
                const intp_t[:] labels,
-               Py_ssize_t min_count=-1):
+               Py_ssize_t min_count=-1) -> None:
     """
     Only aggregates on axis=0
     """
@@ -957,8 +955,9 @@ def group_nth(rank_t[:, ::1] out,
               int64_t[::1] counts,
               ndarray[rank_t, ndim=2] values,
               const intp_t[:] labels,
-              int64_t min_count=-1, int64_t rank=1
-              ):
+              int64_t min_count=-1,
+              int64_t rank=1,
+              ) -> None:
     """
     Only aggregates on axis=0
     """
@@ -1050,8 +1049,8 @@ def group_rank(float64_t[:, ::1] out,
                ndarray[rank_t, ndim=2] values,
                const intp_t[:] labels,
                int ngroups,
-               bint is_datetimelike, object ties_method="average",
-               bint ascending=True, bint pct=False, object na_option="keep"):
+               bint is_datetimelike, str ties_method="average",
+               bint ascending=True, bint pct=False, str na_option="keep") -> None:
     """
     Provides the rank of values within each group.
 
@@ -1221,7 +1220,7 @@ def group_max(groupby_t[:, ::1] out,
               int64_t[::1] counts,
               ndarray[groupby_t, ndim=2] values,
               const intp_t[:] labels,
-              Py_ssize_t min_count=-1):
+              Py_ssize_t min_count=-1) -> None:
     """See group_min_max.__doc__"""
     group_min_max(out, counts, values, labels, min_count=min_count, compute_max=True)
 
@@ -1232,7 +1231,7 @@ def group_min(groupby_t[:, ::1] out,
               int64_t[::1] counts,
               ndarray[groupby_t, ndim=2] values,
               const intp_t[:] labels,
-              Py_ssize_t min_count=-1):
+              Py_ssize_t min_count=-1) -> None:
     """See group_min_max.__doc__"""
     group_min_max(out, counts, values, labels, min_count=min_count, compute_max=False)
 
@@ -1311,7 +1310,7 @@ def group_cummin(groupby_t[:, ::1] out,
                  ndarray[groupby_t, ndim=2] values,
                  const intp_t[:] labels,
                  int ngroups,
-                 bint is_datetimelike):
+                 bint is_datetimelike) -> None:
     """See group_cummin_max.__doc__"""
     group_cummin_max(out, values, labels, ngroups, is_datetimelike, compute_max=False)
 
@@ -1322,6 +1321,6 @@ def group_cummax(groupby_t[:, ::1] out,
                  ndarray[groupby_t, ndim=2] values,
                  const intp_t[:] labels,
                  int ngroups,
-                 bint is_datetimelike):
+                 bint is_datetimelike) -> None:
     """See group_cummin_max.__doc__"""
     group_cummin_max(out, values, labels, ngroups, is_datetimelike, compute_max=True)
diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd
@@ -134,6 +134,6 @@ cdef class Int64Vector:
     cdef bint external_view_exists
 
     cdef resize(self)
-    cpdef to_array(self)
+    cpdef ndarray to_array(self)
     cdef inline void append(self, int64_t x)
     cdef extend(self, int64_t[:] x)
diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
@@ -61,7 +61,7 @@ cdef class Factorizer:
         ObjectVector uniques
         Py_ssize_t count
 
-    def __init__(self, size_hint):
+    def __init__(self, size_hint: int):
         self.table = PyObjectHashTable(size_hint)
         self.uniques = ObjectVector()
         self.count = 0
@@ -116,12 +116,12 @@ cdef class Int64Factorizer:
         Int64Vector uniques
         Py_ssize_t count
 
-    def __init__(self, size_hint):
+    def __init__(self, size_hint: int):
         self.table = Int64HashTable(size_hint)
         self.uniques = Int64Vector()
         self.count = 0
 
-    def get_count(self):
+    def get_count(self) -> int:
         return self.count
 
     def factorize(self, const int64_t[:] values, sort=False,

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -220,7 +220,7 @@ cdef class {{name}}Vector:
     def __len__(self) -> int:
         return self.data.n
 
-    cpdef to_array(self):
+    cpdef ndarray to_array(self):
         if self.data.m != self.data.n:
             if self.external_view_exists:
                 # should never happen
@@ -288,7 +288,7 @@ cdef class StringVector:
     def __len__(self) -> int:
         return self.data.n
 
-    def to_array(self):
+    cpdef ndarray[object, ndim=1] to_array(self):
         cdef:
             ndarray ao
             Py_ssize_t n
@@ -345,7 +345,7 @@ cdef class ObjectVector:
         self.data[self.n] = <PyObject*>obj
         self.n += 1
 
-    def to_array(self):
+    cpdef ndarray[object, ndim=1] to_array(self):
         if self.m != self.n:
             if self.external_view_exists:
                 raise ValueError("should have raised on append()")
@@ -403,7 +403,7 @@ cdef class {{name}}HashTable(HashTable):
             kh_destroy_{{dtype}}(self.table)
             self.table = NULL
 
-    def __contains__(self, object key):
+    def __contains__(self, object key) -> bool:
         cdef:
             khiter_t k
             {{c_type}} ckey
@@ -452,7 +452,7 @@ cdef class {{name}}HashTable(HashTable):
             raise KeyError(key)
 
     @cython.boundscheck(False)
-    def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values):
+    def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values) -> None:
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0
@@ -466,7 +466,7 @@ cdef class {{name}}HashTable(HashTable):
                 self.table.vals[k] = <Py_ssize_t>values[i]
 
     @cython.boundscheck(False)
-    def map_locations(self, const {{dtype}}_t[:] values):
+    def map_locations(self, const {{dtype}}_t[:] values) -> None:
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0
@@ -480,7 +480,8 @@ cdef class {{name}}HashTable(HashTable):
                 self.table.vals[k] = i
 
     @cython.boundscheck(False)
-    def lookup(self, const {{dtype}}_t[:] values):
+    def lookup(self, const {{dtype}}_t[:] values) -> ndarray:
+        # -> np.ndarray[np.intp]
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0
@@ -818,7 +819,8 @@ cdef class StringHashTable(HashTable):
         return labels
 
     @cython.boundscheck(False)
-    def lookup(self, ndarray[object] values):
+    def lookup(self, ndarray[object] values) -> ndarray:
+        # -> np.ndarray[np.intp]
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0
@@ -853,7 +855,7 @@ cdef class StringHashTable(HashTable):
         return np.asarray(locs)
 
     @cython.boundscheck(False)
-    def map_locations(self, ndarray[object] values):
+    def map_locations(self, ndarray[object] values) -> None:
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0
@@ -1071,7 +1073,7 @@ cdef class PyObjectHashTable(HashTable):
     def __len__(self) -> int:
         return self.table.size
 
-    def __contains__(self, object key):
+    def __contains__(self, object key) -> bool:
         cdef:
             khiter_t k
         hash(key)
@@ -1123,7 +1125,7 @@ cdef class PyObjectHashTable(HashTable):
         else:
             raise KeyError(key)
 
-    def map_locations(self, ndarray[object] values):
+    def map_locations(self, ndarray[object] values) -> None:
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0
@@ -1137,7 +1139,8 @@ cdef class PyObjectHashTable(HashTable):
             k = kh_put_pymap(self.table, <PyObject*>val, &ret)
             self.table.vals[k] = i
 
-    def lookup(self, ndarray[object] values):
+    def lookup(self, ndarray[object] values) -> ndarray:
+        # -> np.ndarray[np.intp]
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -132,6 +132,7 @@ cdef class IndexEngine:
         return self._maybe_get_bool_indexer(val)
 
     cdef _maybe_get_bool_indexer(self, object val):
+        # Returns ndarray[bool] or int
         cdef:
             ndarray[uint8_t, ndim=1, cast=True] indexer
 
@@ -247,7 +248,7 @@ cdef class IndexEngine:
 
         self.need_unique_check = 0
 
-    cdef void _call_map_locations(self, values):
+    cdef void _call_map_locations(self, ndarray values):
         self.mapping.map_locations(values)
 
     def clear_mapping(self):

diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in
@@ -44,10 +44,11 @@ cdef class {{name}}Engine(IndexEngine):
             raise KeyError(val)
     {{endif}}
 
-    cdef void _call_map_locations(self, values):
+    cdef void _call_map_locations(self, ndarray values):
         self.mapping.map_locations(algos.ensure_{{name.lower()}}(values))
 
     cdef _maybe_get_bool_indexer(self, object val):
+        # Returns ndarray[bool] or int
         cdef:
             ndarray[uint8_t, ndim=1, cast=True] indexer
             ndarray[intp_t, ndim=1] found