CLN: cython and docstring cleanups (#29089)

jbrockmendel · jreback · commit 693105169f51 · 2019-10-19T13:13:05.000-04:00
diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
@@ -17,12 +17,11 @@ dtypes = [('float64', 'float64_t', 'float64_t'),
 def get_dispatch(dtypes):
 
     for name, c_type, dest_type, in dtypes:
-        dest_name = dest_type[:-2]  # i.e. strip "_t"
-        yield name, c_type, dest_type, dest_name
+        yield name, c_type, dest_type
 
 }}
 
-{{for name, c_type, dest_type, dest_name
+{{for name, c_type, dest_type
       in get_dispatch(dtypes)}}
 
 
diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
@@ -10,28 +10,28 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 
 {{py:
 
-# name, dest, c_type_in, c_type_out, preval, postval, can_copy, nogil
+# c_type_in, c_type_out, preval, postval
 dtypes = [
-    ('bool', 'bool', 'uint8_t', 'uint8_t', '', '', True),
-    ('bool', 'object', 'uint8_t', 'object',
-     'True if ', ' > 0 else False', False),
-    ('int8', 'int8', 'int8_t', 'int8_t', '', '', True),
-    ('int8', 'int32', 'int8_t', 'int32_t', '', '', False),
-    ('int8', 'int64', 'int8_t', 'int64_t', '', '', False),
-    ('int8', 'float64', 'int8_t', 'float64_t', '', '', False),
-    ('int16', 'int16', 'int16_t', 'int16_t', '', '', True),
-    ('int16', 'int32', 'int16_t', 'int32_t', '', '', False),
-    ('int16', 'int64', 'int16_t', 'int64_t', '', '', False),
-    ('int16', 'float64', 'int16_t', 'float64_t', '', '', False),
-    ('int32', 'int32', 'int32_t', 'int32_t', '', '', True),
-    ('int32', 'int64', 'int32_t', 'int64_t', '', '', False),
-    ('int32', 'float64', 'int32_t', 'float64_t', '', '', False),
-    ('int64', 'int64', 'int64_t', 'int64_t', '', '', True),
-    ('int64', 'float64', 'int64_t', 'float64_t', '', '', False),
-    ('float32', 'float32', 'float32_t', 'float32_t', '', '', True),
-    ('float32', 'float64', 'float32_t', 'float64_t', '', '', False),
-    ('float64', 'float64', 'float64_t', 'float64_t', '', '', True),
-    ('object', 'object', 'object', 'object', '', '', False)]
+    ('uint8_t', 'uint8_t', '', ''),
+    ('uint8_t', 'object', 'True if ', ' > 0 else False'),
+    ('int8_t', 'int8_t', '', ''),
+    ('int8_t', 'int32_t', '', ''),
+    ('int8_t', 'int64_t', '', ''),
+    ('int8_t', 'float64_t', '', ''),
+    ('int16_t', 'int16_t', '', ''),
+    ('int16_t', 'int32_t', '', ''),
+    ('int16_t', 'int64_t', '', ''),
+    ('int16_t', 'float64_t', '', ''),
+    ('int32_t', 'int32_t', '', ''),
+    ('int32_t', 'int64_t', '', ''),
+    ('int32_t', 'float64_t', '', ''),
+    ('int64_t', 'int64_t', '', ''),
+    ('int64_t', 'float64_t', '', ''),
+    ('float32_t', 'float32_t', '', ''),
+    ('float32_t', 'float64_t', '', ''),
+    ('float64_t', 'float64_t', '', ''),
+    ('object', 'object', '', ''),
+]
 
 
 def get_dispatch(dtypes):
@@ -117,9 +117,9 @@ def get_dispatch(dtypes):
                 out[i, j] = %(preval)svalues[i, idx]%(postval)s
 """
 
-    for (name, dest, c_type_in, c_type_out, preval, postval,
-         can_copy) in dtypes:
+    for (c_type_in, c_type_out, preval, postval) in dtypes:
 
+        can_copy = c_type_in == c_type_out != "object"
         nogil = c_type_out != "object"
         if nogil:
             nogil_str = "with nogil:"
@@ -128,6 +128,16 @@ def get_dispatch(dtypes):
             nogil_str = ''
             tab = ''
 
+        def get_name(dtype_name):
+            if dtype_name == "object":
+                return "object"
+            if dtype_name == "uint8_t":
+                return "bool"
+            return dtype_name[:-2]
+
+        name = get_name(c_type_in)
+        dest = get_name(c_type_out)
+
         args = dict(name=name, dest=dest, c_type_in=c_type_in,
                     c_type_out=c_type_out, preval=preval, postval=postval,
                     can_copy=can_copy, nogil_str=nogil_str, tab=tab)
@@ -291,9 +301,3 @@ cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
         for j in range(K):
             result[i, j] = values[i, indexer[i, j]]
     return result
-
-
-_take_2d_object = _take_2d[object]
-_take_2d_float64 = _take_2d[float64_t]
-_take_2d_int64 = _take_2d[int64_t]
-_take_2d_uint64 = _take_2d[uint64_t]
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -151,12 +151,14 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
 
     if keep == 'last':
         {{if dtype == 'object'}}
-        for i from n > i >= 0:
+        for i in range(n - 1, -1, -1):
+            # equivalent: range(n)[::-1], which cython doesnt like in nogil
             kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
             out[i] = ret == 0
         {{else}}
         with nogil:
-            for i from n > i >= 0:
+            for i in range(n - 1, -1, -1):
+                # equivalent: range(n)[::-1], which cython doesnt like in nogil
                 kh_put_{{ttype}}(table, values[i], &ret)
                 out[i] = ret == 0
         {{endif}}
diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
@@ -13,13 +13,16 @@ from pandas._libs.algos import (
 )
 
 
+@cython.boundscheck(False)
 def inner_join(const int64_t[:] left, const int64_t[:] right,
                Py_ssize_t max_groups):
     cdef:
         Py_ssize_t i, j, k, count = 0
         ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
         ndarray[int64_t] left_indexer, right_indexer
         int64_t lc, rc
+        Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
+        Py_ssize_t offset
 
     # NA group in location 0
 
@@ -34,11 +37,6 @@ def inner_join(const int64_t[:] left, const int64_t[:] right,
         if rc > 0 and lc > 0:
             count += lc * rc
 
-    # group 0 is the NA group
-    cdef:
-        Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
-        Py_ssize_t offset
-
     # exclude the NA group
     left_pos = left_count[0]
     right_pos = right_count[0]
@@ -64,6 +62,7 @@ def inner_join(const int64_t[:] left, const int64_t[:] right,
             _get_result_indexer(right_sorter, right_indexer))
 
 
+@cython.boundscheck(False)
 def left_outer_join(const int64_t[:] left, const int64_t[:] right,
                     Py_ssize_t max_groups, sort=True):
     cdef:
@@ -72,6 +71,8 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
         ndarray rev
         ndarray[int64_t] left_indexer, right_indexer
         int64_t lc, rc
+        Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
+        Py_ssize_t offset
 
     # NA group in location 0
 
@@ -85,11 +86,6 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
         else:
             count += left_count[i]
 
-    # group 0 is the NA group
-    cdef:
-        Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
-        Py_ssize_t offset
-
     # exclude the NA group
     left_pos = left_count[0]
     right_pos = right_count[0]
@@ -137,13 +133,16 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
     return left_indexer, right_indexer
 
 
+@cython.boundscheck(False)
 def full_outer_join(const int64_t[:] left, const int64_t[:] right,
                     Py_ssize_t max_groups):
     cdef:
         Py_ssize_t i, j, k, count = 0
         ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
         ndarray[int64_t] left_indexer, right_indexer
         int64_t lc, rc
+        int64_t left_pos = 0, right_pos = 0
+        Py_ssize_t offset, position = 0
 
     # NA group in location 0
 
@@ -160,11 +159,6 @@ def full_outer_join(const int64_t[:] left, const int64_t[:] right,
         else:
             count += lc + rc
 
-    # group 0 is the NA group
-    cdef:
-        int64_t left_pos = 0, right_pos = 0
-        Py_ssize_t offset, position = 0
-
     # exclude the NA group
     left_pos = left_count[0]
     right_pos = right_count[0]
diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in
@@ -86,7 +86,7 @@ def get_op(tup):
                 'and': '{0} & {1}',     # logical op
                 'or': '{0} | {1}'}
 
-    return ops_dict[opname].format(lval, rval, dtype)
+    return ops_dict[opname].format(lval, rval)
 
 
 def get_dispatch(dtypes):
diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx
@@ -1296,7 +1296,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values,
         # The original impl didn't deal with variable window sizes
         # So the code was optimized for that
 
-        for i from starti[0] <= i < endi[0]:
+        for i in range(starti[0], endi[0]):
             ai = init_mm(values[i], &nobs, is_max)
 
             # Discard previous entries if we find new min or max
@@ -1644,7 +1644,7 @@ def roll_generic(object obj,
     else:
 
         # truncated windows at the beginning, through first full-length window
-        for i from 0 <= i < (int_min(win, N) - offset):
+        for i in range((int_min(win, N) - offset)):
             if counts[i] >= minp:
                 output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs)
             else:
@@ -1654,7 +1654,7 @@ def roll_generic(object obj,
         buf = <float64_t *>arr.data
         bufarr = np.empty(win, dtype=float)
         oldbuf = <float64_t *>bufarr.data
-        for i from (win - offset) <= i < (N - offset):
+        for i in range((win - offset), (N - offset)):
             buf = buf + 1
             bufarr.data = <char *>buf
             if counts[i] >= minp:
@@ -1664,7 +1664,7 @@ def roll_generic(object obj,
         bufarr.data = <char *>oldbuf
 
         # truncated windows at the end
-        for i from int_max(N - offset, 0) <= i < N:
+        for i in range(int_max(N - offset, 0), N):
             if counts[i] >= minp:
                 output[i] = func(arr[int_max(i + offset - win + 1, 0): N],
                                  *args,
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -1104,7 +1104,7 @@ def _create_method(cls, op, coerce_to_dtype=True):
         ----------
         op : function
             An operator that takes arguments op(a, b)
-        coerce_to_dtype :  bool, default True
+        coerce_to_dtype : bool, default True
             boolean indicating whether to attempt to convert
             the result to the underlying ExtensionArray dtype.
             If it's not possible to create a new ExtensionArray with the
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2481,7 +2481,7 @@ def to_hdf(self, path_or_buf, key, **kwargs):
               like searching / selecting subsets of the data.
         append : bool, default False
             For Table formats, append the input data to the existing.
-        data_columns :  list of columns or True, optional
+        data_columns : list of columns or True, optional
             List of columns to create as indexed data columns for on-disk
             queries, or True to use all columns. By default only the axes
             of the object are indexed. See :ref:`io.hdf5-query-data-columns`.
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -2080,7 +2080,7 @@ def rank(
             * dense: like 'min', but rank always increases by 1 between groups
         ascending : bool, default True
             False for ranks by high (1) to low (N).
-        na_option :  {'keep', 'top', 'bottom'}, default 'keep'
+        na_option : {'keep', 'top', 'bottom'}, default 'keep'
             * keep: leave NA values where they are
             * top: smallest rank if ascending
             * bottom: smallest rank if descending
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2031,7 +2031,7 @@ def fillna(self, value=None, downcast=None):
 
         Parameters
         ----------
-        how :  {'any', 'all'}, default 'any'
+        how : {'any', 'all'}, default 'any'
             If the Index is a MultiIndex, drop the value when any or all levels
             are NaN.
 
@@ -5016,12 +5016,11 @@ def _validate_indexer(self, form, key, kind):
 
         Returns
         -------
-        label :  object
+        label : object
 
         Notes
         -----
         Value of `side` parameter should be validated in caller.
-
         """
 
     @Appender(_index_shared_docs["_maybe_cast_slice_bound"])
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -1079,12 +1079,11 @@ def _maybe_cast_slice_bound(self, label, side, kind):
 
         Returns
         -------
-        label :  object
+        label : object
 
         Notes
         -----
         Value of `side` parameter should be validated in caller.
-
         """
         assert kind in ["ix", "loc", "getitem", None]
 
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
@@ -550,7 +550,6 @@ def _maybe_cast_slice_bound(self, label, side, kind):
         """
         If label is a string, cast it to timedelta according to resolution.
 
-
         Parameters
         ----------
         label : object
@@ -559,8 +558,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
 
         Returns
         -------
-        label :  object
-
+        label : object
         """
         assert kind in ["ix", "loc", "getitem", None]
 
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
@@ -420,7 +420,7 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
     ----------
     xi : array_like
         A sorted list of x-coordinates, of length N.
-    yi :  array_like
+    yi : array_like
         A 1-D array of real values.  `yi`'s length along the interpolation
         axis must be equal to the length of `xi`. If N-D array, use axis
         parameter to select correct axis.
diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py
@@ -387,7 +387,7 @@ def _make_flex_doc(op_name, typ):
 ----------
 other : scalar, sequence, Series, or DataFrame
     Any single or multiple element data structure, or list-like object.
-axis :  {{0 or 'index', 1 or 'columns'}}
+axis : {{0 or 'index', 1 or 'columns'}}
     Whether to compare by the index (0 or 'index') or columns
     (1 or 'columns'). For Series input, axis to match Series index on.
 level : int or label
@@ -541,7 +541,7 @@ def _make_flex_doc(op_name, typ):
 ----------
 other : scalar, sequence, Series, or DataFrame
     Any single or multiple element data structure, or list-like object.
-axis :  {{0 or 'index', 1 or 'columns'}}, default 'columns'
+axis : {{0 or 'index', 1 or 'columns'}}, default 'columns'
     Whether to compare by the index (0 or 'index') or columns
     (1 or 'columns').
 level : int or label
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -1343,7 +1343,7 @@ def str_pad(arr, width, side="left", fillchar=" "):
         character. Equivalent to ``Series.str.pad(side='right')``.
     Series.str.center : Fills boths sides of strings with an arbitrary
         character. Equivalent to ``Series.str.pad(side='both')``.
-    Series.str.zfill :  Pad strings in the Series/Index by prepending '0'
+    Series.str.zfill : Pad strings in the Series/Index by prepending '0'
         character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``.
 
     Examples
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -1027,7 +1027,7 @@ def append(
                        / selecting subsets of the data
         append       : bool, default True
             Append the input data to the existing.
-        data_columns :  list of columns, or True, default None
+        data_columns : list of columns, or True, default None
             List of columns to create as indexed data columns for on-disk
             queries, or True to use all columns. By default only the axes
             of the object are indexed. See `here
diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
@@ -311,7 +311,7 @@ def _check_ax_scales(self, axes, xaxis="linear", yaxis="linear"):
         axes : matplotlib Axes object, or its list-like
         xaxis : {'linear', 'log'}
             expected xaxis scale
-        yaxis :  {'linear', 'log'}
+        yaxis : {'linear', 'log'}
             expected yaxis scale
         """
         axes = self._flatten_visible(axes)
@@ -329,7 +329,7 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=None):
         axes_num : number
             expected number of axes. Unnecessary axes should be set to
             invisible.
-        layout :  tuple
+        layout : tuple
             expected layout, (expected number of rows , columns)
         figsize : tuple
             expected figsize. default is matplotlib default