diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 1cbdb0df6233c..c39d6d60d4ea5 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -13,7 +13,7 @@ from pandas._libs.tslibs.util cimport get_c_string {{py: -# name, dtype, arg +# name, dtype, c_type # the generated StringVector is not actually used # but is included for completeness (rather ObjectVector is used # for uniques in hashtables) @@ -24,13 +24,13 @@ dtypes = [('Float64', 'float64', 'float64_t'), ('UInt64', 'uint64', 'uint64_t')] }} -{{for name, dtype, arg in dtypes}} +{{for name, dtype, c_type in dtypes}} {{if dtype != 'int64'}} ctypedef struct {{name}}VectorData: - {{arg}} *data + {{c_type}} *data Py_ssize_t n, m {{endif}} @@ -39,7 +39,7 @@ ctypedef struct {{name}}VectorData: @cython.wraparound(False) @cython.boundscheck(False) cdef inline void append_data_{{dtype}}({{name}}VectorData *data, - {{arg}} x) nogil: + {{c_type}} x) nogil: data.data[data.n] = x data.n += 1 @@ -61,14 +61,14 @@ cdef inline bint needs_resize(vector_data *data) nogil: {{py: -# name, dtype, arg, idtype -dtypes = [('Float64', 'float64', 'float64_t', 'np.float64'), - ('UInt64', 'uint64', 'uint64_t', 'np.uint64'), - ('Int64', 'int64', 'int64_t', 'np.int64')] +# name, dtype, c_type +dtypes = [('Float64', 'float64', 'float64_t'), + ('UInt64', 'uint64', 'uint64_t'), + ('Int64', 'int64', 'int64_t')] }} -{{for name, dtype, arg, idtype in dtypes}} +{{for name, dtype, c_type in dtypes}} cdef class {{name}}Vector: @@ -87,13 +87,13 @@ cdef class {{name}}Vector: self.external_view_exists = False self.data.n = 0 self.data.m = _INIT_VEC_CAP - self.ao = np.empty(self.data.m, dtype={{idtype}}) - self.data.data = <{{arg}}*>self.ao.data + self.ao = np.empty(self.data.m, dtype=np.{{dtype}}) + self.data.data = <{{c_type}}*>self.ao.data cdef resize(self): self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) self.ao.resize(self.data.m, refcheck=False) - self.data.data = <{{arg}}*>self.ao.data + self.data.data = <{{c_type}}*>self.ao.data def __dealloc__(self): if self.data is not NULL: @@ -113,7 +113,7 @@ cdef class {{name}}Vector: self.external_view_exists = True return self.ao - cdef inline void append(self, {{arg}} x): + cdef inline void append(self, {{c_type}} x): if needs_resize(self.data): if self.external_view_exists: @@ -123,7 +123,7 @@ cdef class {{name}}Vector: append_data_{{dtype}}(self.data, x) - cdef extend(self, const {{arg}}[:] x): + cdef extend(self, const {{c_type}}[:] x): for i in range(len(x)): self.append(x[i]) @@ -279,7 +279,8 @@ cdef class {{name}}HashTable(HashTable): self.table = NULL def __contains__(self, object key): - cdef khiter_t k + cdef: + khiter_t k k = kh_get_{{dtype}}(self.table, key) return k != self.table.n_buckets @@ -290,7 +291,8 @@ cdef class {{name}}HashTable(HashTable): sizeof(uint32_t)) # flags cpdef get_item(self, {{dtype}}_t val): - cdef khiter_t k + cdef: + khiter_t k k = kh_get_{{dtype}}(self.table, val) if k != self.table.n_buckets: return self.table.vals[k] @@ -899,7 +901,8 @@ cdef class PyObjectHashTable(HashTable): return self.table.size def __contains__(self, object key): - cdef khiter_t k + cdef: + khiter_t k hash(key) k = kh_get_pymap(self.table, key) @@ -912,7 +915,8 @@ cdef class PyObjectHashTable(HashTable): sizeof(uint32_t)) # flags cpdef get_item(self, object val): - cdef khiter_t k + cdef: + khiter_t k k = kh_get_pymap(self.table, val) if k != self.table.n_buckets: diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index f6af93f85bd5a..c4284ae403e5c 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -4,13 +4,9 @@ Template for each `dtype` helper function for hashtable WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -# ---------------------------------------------------------------------- -# VectorData -# ---------------------------------------------------------------------- - {{py: -# dtype, ttype +# dtype, ttype, c_type dtypes = [('float64', 'float64', 'float64_t'), ('uint64', 'uint64', 'uint64_t'), ('object', 'pymap', 'object'), @@ -18,7 +14,7 @@ dtypes = [('float64', 'float64', 'float64_t'), }} -{{for dtype, ttype, scalar in dtypes}} +{{for dtype, ttype, c_type in dtypes}} @cython.wraparound(False) @@ -34,7 +30,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values, khiter_t k Py_ssize_t i, n = len(values) - {{scalar}} val + {{c_type}} val int ret = 0 @@ -77,7 +73,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values, {{if dtype == 'object'}} cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna): {{else}} -cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna): +cpdef value_count_{{dtype}}({{c_type}}[:] values, bint dropna): {{endif}} cdef: Py_ssize_t i = 0 @@ -127,13 +123,9 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna): @cython.wraparound(False) @cython.boundscheck(False) {{if dtype == 'object'}} - - def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'): {{else}} - - -def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): +def duplicated_{{dtype}}({{c_type}}[:] values, object keep='first'): {{endif}} cdef: int ret = 0 @@ -212,15 +204,10 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): @cython.wraparound(False) @cython.boundscheck(False) {{if dtype == 'object'}} - - -def ismember_{{dtype}}(ndarray[{{scalar}}] arr, ndarray[{{scalar}}] values): +def ismember_{{dtype}}(ndarray[{{c_type}}] arr, ndarray[{{c_type}}] values): {{else}} - - -def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values): +def ismember_{{dtype}}({{c_type}}[:] arr, {{c_type}}[:] values): {{endif}} - """ Return boolean of values in arr on an element by-element basis @@ -238,7 +225,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values): Py_ssize_t i, n, k int ret = 0 ndarray[uint8_t] result - {{scalar}} val + {{c_type}} val kh_{{ttype}}_t *table = kh_init_{{ttype}}() # construct the table diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 5f697f282fee5..48190d123f4a9 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -284,7 +284,7 @@ cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): return start, stop, step, length -def slice_getitem(slice slc not None, ind): +cdef slice_getitem(slice slc, ind): cdef: Py_ssize_t s_start, s_stop, s_step, s_len Py_ssize_t ind_start, ind_stop, ind_step, ind_len diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 6a3f20928f64b..1a712d0c4efa8 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -18,7 +18,7 @@ cnp.import_array() cimport pandas._libs.util as util -from pandas._libs.hashtable cimport Int64Vector, Int64VectorData +from pandas._libs.hashtable cimport Int64Vector from pandas._libs.tslibs.util cimport is_integer_object, is_float_object from pandas._libs.tslibs import Timestamp diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index b13246a4a969c..7fc4fede1996b 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -9,12 +9,9 @@ import warnings import cython from cython import Py_ssize_t -from cpython.list cimport PyList_New -from cpython.object cimport (PyObject_Str, PyObject_RichCompareBool, Py_EQ, - Py_SIZE) +from cpython.object cimport PyObject_RichCompareBool, Py_EQ from cpython.ref cimport Py_INCREF from cpython.tuple cimport PyTuple_SET_ITEM, PyTuple_New -from cpython.unicode cimport PyUnicode_Join from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d3c32511b40c3..d59ce8db9ba8e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10852,7 +10852,7 @@ def transform(self, func, *args, **kwargs): Also returns None for empty %(klass)s. """ - def _find_valid_index(self, how): + def _find_valid_index(self, how: str): """ Retrieves the index of the first valid value. diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index e3617d53b000a..fddbea8ed0d7a 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -5,8 +5,8 @@ import numpy as np +from pandas._libs import Timestamp import pandas._libs.hashing as hashing -import pandas._libs.tslibs as tslibs from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( @@ -337,8 +337,8 @@ def _hash_scalar(val, encoding: str = "utf8", hash_key=None): # for tz-aware datetimes, we need the underlying naive UTC value and # not the tz aware object or pd extension type (as # infer_dtype_from_scalar would do) - if not isinstance(val, tslibs.Timestamp): - val = tslibs.Timestamp(val) + if not isinstance(val, Timestamp): + val = Timestamp(val) val = val.tz_convert(None) dtype, val = infer_dtype_from_scalar(val)