CLN: Assorted cleanups (pandas-dev#29175)

jbrockmendel · proost · commit ef295e78df44 · 2019-12-20T01:09:07.000+09:00
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -13,7 +13,7 @@ from pandas._libs.tslibs.util cimport get_c_string
 
 {{py:
 
-# name, dtype, arg
+# name, dtype, c_type
 # the generated StringVector is not actually used
 # but is included for completeness (rather ObjectVector is used
 # for uniques in hashtables)
@@ -24,13 +24,13 @@ dtypes = [('Float64', 'float64', 'float64_t'),
           ('UInt64', 'uint64', 'uint64_t')]
 }}
 
-{{for name, dtype, arg in dtypes}}
+{{for name, dtype, c_type in dtypes}}
 
 
 {{if dtype != 'int64'}}
 
 ctypedef struct {{name}}VectorData:
-    {{arg}} *data
+    {{c_type}} *data
     Py_ssize_t n, m
 
 {{endif}}
@@ -39,7 +39,7 @@ ctypedef struct {{name}}VectorData:
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline void append_data_{{dtype}}({{name}}VectorData *data,
-                                       {{arg}} x) nogil:
+                                       {{c_type}} x) nogil:
 
     data.data[data.n] = x
     data.n += 1
@@ -61,14 +61,14 @@ cdef inline bint needs_resize(vector_data *data) nogil:
 
 {{py:
 
-# name, dtype, arg, idtype
-dtypes = [('Float64', 'float64', 'float64_t', 'np.float64'),
-          ('UInt64', 'uint64', 'uint64_t', 'np.uint64'),
-          ('Int64', 'int64', 'int64_t', 'np.int64')]
+# name, dtype, c_type
+dtypes = [('Float64', 'float64', 'float64_t'),
+          ('UInt64', 'uint64', 'uint64_t'),
+          ('Int64', 'int64', 'int64_t')]
 
 }}
 
-{{for name, dtype, arg, idtype in dtypes}}
+{{for name, dtype, c_type in dtypes}}
 
 cdef class {{name}}Vector:
 
@@ -87,13 +87,13 @@ cdef class {{name}}Vector:
         self.external_view_exists = False
         self.data.n = 0
         self.data.m = _INIT_VEC_CAP
-        self.ao = np.empty(self.data.m, dtype={{idtype}})
-        self.data.data = <{{arg}}*>self.ao.data
+        self.ao = np.empty(self.data.m, dtype=np.{{dtype}})
+        self.data.data = <{{c_type}}*>self.ao.data
 
     cdef resize(self):
         self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
         self.ao.resize(self.data.m, refcheck=False)
-        self.data.data = <{{arg}}*>self.ao.data
+        self.data.data = <{{c_type}}*>self.ao.data
 
     def __dealloc__(self):
         if self.data is not NULL:
@@ -113,7 +113,7 @@ cdef class {{name}}Vector:
         self.external_view_exists = True
         return self.ao
 
-    cdef inline void append(self, {{arg}} x):
+    cdef inline void append(self, {{c_type}} x):
 
         if needs_resize(self.data):
             if self.external_view_exists:
@@ -123,7 +123,7 @@ cdef class {{name}}Vector:
 
         append_data_{{dtype}}(self.data, x)
 
-    cdef extend(self, const {{arg}}[:] x):
+    cdef extend(self, const {{c_type}}[:] x):
         for i in range(len(x)):
             self.append(x[i])
 
@@ -279,7 +279,8 @@ cdef class {{name}}HashTable(HashTable):
             self.table = NULL
 
     def __contains__(self, object key):
-        cdef khiter_t k
+        cdef:
+            khiter_t k
         k = kh_get_{{dtype}}(self.table, key)
         return k != self.table.n_buckets
 
@@ -290,7 +291,8 @@ cdef class {{name}}HashTable(HashTable):
                                        sizeof(uint32_t))  # flags
 
     cpdef get_item(self, {{dtype}}_t val):
-        cdef khiter_t k
+        cdef:
+            khiter_t k
         k = kh_get_{{dtype}}(self.table, val)
         if k != self.table.n_buckets:
             return self.table.vals[k]
@@ -899,7 +901,8 @@ cdef class PyObjectHashTable(HashTable):
         return self.table.size
 
     def __contains__(self, object key):
-        cdef khiter_t k
+        cdef:
+            khiter_t k
         hash(key)
 
         k = kh_get_pymap(self.table, <PyObject*>key)
@@ -912,7 +915,8 @@ cdef class PyObjectHashTable(HashTable):
                                        sizeof(uint32_t))  # flags
 
     cpdef get_item(self, object val):
-        cdef khiter_t k
+        cdef:
+            khiter_t k
 
         k = kh_get_pymap(self.table, <PyObject*>val)
         if k != self.table.n_buckets:
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -4,21 +4,17 @@ Template for each `dtype` helper function for hashtable
 WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 """
 
-# ----------------------------------------------------------------------
-# VectorData
-# ----------------------------------------------------------------------
-
 {{py:
 
-# dtype, ttype
+# dtype, ttype, c_type
 dtypes = [('float64', 'float64', 'float64_t'),
           ('uint64', 'uint64', 'uint64_t'),
           ('object', 'pymap', 'object'),
           ('int64', 'int64', 'int64_t')]
 
 }}
 
-{{for dtype, ttype, scalar in dtypes}}
+{{for dtype, ttype, c_type in dtypes}}
 
 
 @cython.wraparound(False)
@@ -34,7 +30,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,
         khiter_t k
         Py_ssize_t i, n = len(values)
 
-        {{scalar}} val
+        {{c_type}} val
 
         int ret = 0
 
@@ -77,7 +73,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,
 {{if dtype == 'object'}}
 cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
 {{else}}
-cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
+cpdef value_count_{{dtype}}({{c_type}}[:] values, bint dropna):
 {{endif}}
     cdef:
         Py_ssize_t i = 0
@@ -127,13 +123,9 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
 @cython.wraparound(False)
 @cython.boundscheck(False)
 {{if dtype == 'object'}}
-
-
 def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'):
 {{else}}
-
-
-def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
+def duplicated_{{dtype}}({{c_type}}[:] values, object keep='first'):
 {{endif}}
     cdef:
         int ret = 0
@@ -212,15 +204,10 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
 @cython.wraparound(False)
 @cython.boundscheck(False)
 {{if dtype == 'object'}}
-
-
-def ismember_{{dtype}}(ndarray[{{scalar}}] arr, ndarray[{{scalar}}] values):
+def ismember_{{dtype}}(ndarray[{{c_type}}] arr, ndarray[{{c_type}}] values):
 {{else}}
-
-
-def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
+def ismember_{{dtype}}({{c_type}}[:] arr, {{c_type}}[:] values):
 {{endif}}
-
     """
     Return boolean of values in arr on an
     element by-element basis
@@ -238,7 +225,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
         Py_ssize_t i, n, k
         int ret = 0
         ndarray[uint8_t] result
-        {{scalar}} val
+        {{c_type}} val
         kh_{{ttype}}_t *table = kh_init_{{ttype}}()
 
     # construct the table
diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
@@ -284,7 +284,7 @@ cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
     return start, stop, step, length
 
 
-def slice_getitem(slice slc not None, ind):
+cdef slice_getitem(slice slc, ind):
     cdef:
         Py_ssize_t s_start, s_stop, s_step, s_len
         Py_ssize_t ind_start, ind_stop, ind_step, ind_len
diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -18,7 +18,7 @@ cnp.import_array()
 
 cimport pandas._libs.util as util
 
-from pandas._libs.hashtable cimport Int64Vector, Int64VectorData
+from pandas._libs.hashtable cimport Int64Vector
 from pandas._libs.tslibs.util cimport is_integer_object, is_float_object
 
 from pandas._libs.tslibs import Timestamp
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -9,12 +9,9 @@ import warnings
 import cython
 from cython import Py_ssize_t
 
-from cpython.list cimport PyList_New
-from cpython.object cimport (PyObject_Str, PyObject_RichCompareBool, Py_EQ,
-                             Py_SIZE)
+from cpython.object cimport PyObject_RichCompareBool, Py_EQ
 from cpython.ref cimport Py_INCREF
 from cpython.tuple cimport PyTuple_SET_ITEM, PyTuple_New
-from cpython.unicode cimport PyUnicode_Join
 
 from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
                                PyTime_Check, PyDelta_Check,
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10852,7 +10852,7 @@ def transform(self, func, *args, **kwargs):
         Also returns None for empty %(klass)s.
         """
 
-    def _find_valid_index(self, how):
+    def _find_valid_index(self, how: str):
         """
         Retrieves the index of the first valid value.
 
diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
@@ -5,8 +5,8 @@
 
 import numpy as np
 
+from pandas._libs import Timestamp
 import pandas._libs.hashing as hashing
-import pandas._libs.tslibs as tslibs
 
 from pandas.core.dtypes.cast import infer_dtype_from_scalar
 from pandas.core.dtypes.common import (
@@ -337,8 +337,8 @@ def _hash_scalar(val, encoding: str = "utf8", hash_key=None):
         # for tz-aware datetimes, we need the underlying naive UTC value and
         # not the tz aware object or pd extension type (as
         # infer_dtype_from_scalar would do)
-        if not isinstance(val, tslibs.Timestamp):
-            val = tslibs.Timestamp(val)
+        if not isinstance(val, Timestamp):
+            val = Timestamp(val)
         val = val.tz_convert(None)
 
     dtype, val = infer_dtype_from_scalar(val)