pandas-dev · jbrockmendel · Oct 25, 2018 · Oct 26, 2018 · Oct 26, 2018 · Oct 27, 2018
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -77,7 +77,9 @@ class NegInfinity(object):
     __ge__ = lambda self, other: isinstance(other, NegInfinity)
 
 
-cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cpdef ndarray[int64_t, ndim=1] unique_deltas(int64_t[:] arr):
     """
     Efficiently find the unique first-differences of the given array.
 
@@ -98,6 +100,8 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
         int ret = 0
         list uniques = []
 
+    util.require_not_none(arr)
+
     table = kh_init_int64()
     kh_resize_int64(table, 10)
     for i in range(n - 1):
@@ -151,7 +155,7 @@ def is_lexsorted(list_of_arrays: list) -> bint:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
+def groupsort_indexer(int64_t[:] index, Py_ssize_t ngroups):
     """
     compute a 1-d indexer that is an ordering of the passed index,
     ordered by the groups. This is a reverse of the label
@@ -171,6 +175,8 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
         Py_ssize_t i, loc, label, n
         ndarray[int64_t] counts, where, result
 
+    util.require_not_none(index)
+
     counts = np.zeros(ngroups + 1, dtype=np.int64)
     n = len(index)
     result = np.zeros(n, dtype=np.int64)
@@ -236,7 +242,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):
         Py_ssize_t i, j, xi, yi, N, K
         bint minpv
         ndarray[float64_t, ndim=2] result
-        ndarray[uint8_t, ndim=2] mask
+        uint8_t[:, :] mask
         int64_t nobs = 0
         float64_t vx, vy, sumx, sumy, sumxx, sumyy, meanx, meany, divisor
 
@@ -301,7 +307,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
         ndarray[float64_t, ndim=2] result
         ndarray[float64_t, ndim=1] maskedx
         ndarray[float64_t, ndim=1] maskedy
-        ndarray[uint8_t, ndim=2] mask
+        uint8_t[:, :] mask
         int64_t nobs = 0
         float64_t vx, vy, sumx, sumxx, sumyy, mean, divisor
 
@@ -373,7 +379,7 @@ ctypedef fused algos_t:
 # TODO: unused; needed?
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cpdef map_indices(ndarray[algos_t] index):
+cpdef map_indices(algos_t[:] index):
     """
     Produce a dict mapping the values of the input array to their respective
     locations.
@@ -387,6 +393,8 @@ cpdef map_indices(ndarray[algos_t] index):
         Py_ssize_t i, length
         dict result = {}
 
+    util.require_not_none(index)
+
     length = len(index)
 
     for i in range(length):
@@ -397,13 +405,16 @@ cpdef map_indices(ndarray[algos_t] index):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
+def pad(algos_t[:] old, algos_t[:] new, limit=None):
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[int64_t, ndim=1] indexer
         algos_t cur, next
         int lim, fill_count = 0
 
+    util.require_not_none(old)
+    util.require_not_none(new)
+
     nleft = len(old)
     nright = len(new)
     indexer = np.empty(nright, dtype=np.int64)
@@ -475,14 +486,15 @@ pad_bool = pad["uint8_t"]
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def pad_inplace(ndarray[algos_t] values,
-                ndarray[uint8_t, cast=True] mask,
-                limit=None):
+def pad_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
     cdef:
         Py_ssize_t i, N
         algos_t val
         int lim, fill_count = 0
 
+    util.require_not_none(values)
+    util.require_not_none(mask)
+
     N = len(values)
 
     # GH#2778
@@ -521,14 +533,15 @@ pad_inplace_bool = pad_inplace["uint8_t"]
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def pad_2d_inplace(ndarray[algos_t, ndim=2] values,
-                   ndarray[uint8_t, ndim=2] mask,
-                   limit=None):
+def pad_2d_inplace(algos_t[:, :] values, uint8_t[:, :] mask, limit=None):
     cdef:
         Py_ssize_t i, j, N, K
         algos_t val
         int lim, fill_count = 0
 
+    util.require_not_none(values)
+    util.require_not_none(mask)
+
     K, N = (<object> values).shape
 
     # GH#2778
@@ -595,13 +608,16 @@ D
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
+def backfill(algos_t[:] old, algos_t[:] new, limit=None):
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[int64_t, ndim=1] indexer
         algos_t cur, prev
         int lim, fill_count = 0
 
+    util.require_not_none(old)
+    util.require_not_none(new)
+
     nleft = len(old)
     nright = len(new)
     indexer = np.empty(nright, dtype=np.int64)
@@ -674,14 +690,15 @@ backfill_bool = backfill["uint8_t"]
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def backfill_inplace(ndarray[algos_t] values,
-                     ndarray[uint8_t, cast=True] mask,
-                     limit=None):
+def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
     cdef:
         Py_ssize_t i, N
         algos_t val
         int lim, fill_count = 0
 
+    util.require_not_none(values)
+    util.require_not_none(mask)
+
     N = len(values)
 
     # GH#2778
@@ -720,14 +737,15 @@ backfill_inplace_bool = backfill_inplace["uint8_t"]
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def backfill_2d_inplace(ndarray[algos_t, ndim=2] values,
-                        ndarray[uint8_t, ndim=2] mask,
-                        limit=None):
+def backfill_2d_inplace(algos_t[:, :] values, uint8_t[:, :] mask, limit=None):
     cdef:
         Py_ssize_t i, j, N, K
         algos_t val
         int lim, fill_count = 0
 
+    util.require_not_none(values)
+    util.require_not_none(mask)
+
     K, N = (<object> values).shape
 
     # GH#2778
@@ -768,14 +786,16 @@ backfill_2d_inplace_bool = backfill_2d_inplace["uint8_t"]
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def arrmap(ndarray[algos_t] index, object func):
+def arrmap(algos_t[:] index, object func):
     cdef:
         Py_ssize_t length = index.shape[0]
         Py_ssize_t i = 0
-        ndarray[object] result = np.empty(length, dtype=np.object_)
+        object[:] result = np.empty(length, dtype=np.object_)
 
     from pandas._libs.lib import maybe_convert_objects
 
+    util.require_not_none(index)
+
     for i in range(length):
         result[i] = func(index[i])
 
@@ -793,7 +813,7 @@ arrmap_bool = arrmap["uint8_t"]
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def is_monotonic(ndarray[algos_t] arr, bint timelike):
+def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
     """
     Returns
     -------

diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
@@ -29,7 +29,7 @@ def get_dispatch(dtypes):
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
-                     ndarray[{{dest_type}}, ndim=2] out,
+                     {{dest_type}}[:, :] out,
                      Py_ssize_t periods, int axis):
     cdef:
         Py_ssize_t i, j, sx, sy
@@ -72,8 +72,8 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
 
 
 def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
-                                 ndarray[int64_t] indexer, Py_ssize_t loc,
-                                 ndarray[{{dest_type}}] out):
+                                 int64_t[:] indexer, Py_ssize_t loc,
+                                 {{dest_type}}[:, :] out):
     cdef:
         Py_ssize_t i, j, k
 

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -14,7 +14,7 @@ from numpy cimport (ndarray,
 cnp.import_array()
 
 
-from util cimport numeric, get_nat
+from util cimport numeric, get_nat, require_not_none
 
 from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN,
                     TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE)
@@ -98,21 +98,24 @@ cdef inline float64_t kth_smallest_c(float64_t* a,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_median_float64(ndarray[float64_t, ndim=2] out,
-                         ndarray[int64_t] counts,
+def group_median_float64(float64_t[:, :] out,
+                         int64_t[:] counts,
                          ndarray[float64_t, ndim=2] values,
-                         ndarray[int64_t] labels,
+                         int64_t[:] labels,
                          Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
     cdef:
         Py_ssize_t i, j, N, K, ngroups, size
-        ndarray[int64_t] _counts
-        ndarray data
+        int64_t[:] _counts
+        ndarray[float64_t, ndim=2] data
         float64_t* ptr
 
     assert min_count == -1, "'min_count' only used in add and prod"
+    require_not_none(counts)
+    require_not_none(out)
+    require_not_none(labels)
 
     ngroups = len(counts)
     N, K = (<object> values).shape
@@ -217,7 +220,7 @@ def group_cumsum(numeric[:, :] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
+def group_shift_indexer(int64_t[:] out, int64_t[:] labels,
                         int ngroups, int periods):
     cdef:
         Py_ssize_t N, i, j, ii
@@ -269,8 +272,8 @@ def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
-                         ndarray[uint8_t] mask, object direction,
+def group_fillna_indexer(int64_t[:] out, ndarray[int64_t] labels,
+                         uint8_t[:] mask, object direction,
                          int64_t limit):
     """Indexes how to fill values forwards or backwards within a group
 
@@ -291,7 +294,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
     """
     cdef:
         Py_ssize_t i, N
-        ndarray[int64_t] sorted_labels
+        int64_t[:] sorted_labels
         int64_t idx, curr_fill_idx=-1, filled_vals=0
 
     N = len(out)
@@ -301,6 +304,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
 
     sorted_labels = np.argsort(labels, kind='mergesort').astype(
         np.int64, copy=False)
+
     if direction == 'bfill':
         sorted_labels = sorted_labels[::-1]
 
@@ -327,10 +331,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_any_all(ndarray[uint8_t] out,
-                  ndarray[int64_t] labels,
-                  ndarray[uint8_t] values,
-                  ndarray[uint8_t] mask,
+def group_any_all(uint8_t[:] out,
+                  int64_t[:] labels,
+                  uint8_t[:] values,
+                  uint8_t[:] mask,
                   object val_test,
                   bint skipna):
     """Aggregated boolean values to show truthfulness of group elements
@@ -353,10 +357,15 @@ def group_any_all(ndarray[uint8_t] out,
     The returned values will either be 0 or 1 (False or True, respectively).
     """
     cdef:
-        Py_ssize_t i, N=len(labels)
+        Py_ssize_t i, N = len(labels)
         int64_t lab
         uint8_t flag_val
 
+    require_not_none(out)
+    require_not_none(labels)
+    require_not_none(values)
+    require_not_none(mask)
+
     if val_test == 'all':
         # Because the 'all' value of an empty iterable in Python is True we can
         # start with an array full of ones and set to zero when a False value
@@ -370,7 +379,7 @@ def group_any_all(ndarray[uint8_t] out,
     else:
         raise ValueError("'bool_func' must be either 'any' or 'all'!")
 
-    out.fill(1 - flag_val)
+    out[:] = 1 - flag_val
 
     with nogil:
         for i in range(N):