diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 1efef480f3a29..ed4c0e4c59609 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -45,7 +45,7 @@ def get_dispatch(dtypes): @cython.wraparound(False) @cython.boundscheck(False) -cpdef map_indices_{{name}}(ndarray[{{c_type}}] index): +def map_indices_{{name}}(ndarray[{{c_type}}] index): """ Produce a dict mapping the values of the input array to their respective locations. @@ -542,7 +542,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values, cdef int PLATFORM_INT = ( np.arange(0, dtype=np.intp)).descr.type_num -cpdef ensure_platform_int(object arr): +def ensure_platform_int(object arr): # GH3033, GH1392 # platform int is the size of the int pointer, e.g. np.intp if util.is_array(arr): @@ -554,7 +554,7 @@ cpdef ensure_platform_int(object arr): return np.array(arr, dtype=np.intp) -cpdef ensure_object(object arr): +def ensure_object(object arr): if util.is_array(arr): if ( arr).descr.type_num == NPY_OBJECT: return arr @@ -587,7 +587,7 @@ def get_dispatch(dtypes): {{for name, c_type, dtype in get_dispatch(dtypes)}} -cpdef ensure_{{name}}(object arr, copy=True): +def ensure_{{name}}(object arr, copy=True): if util.is_array(arr): if ( arr).descr.type_num == NPY_{{c_type}}: return arr diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index d8feda9ef27ef..d683c93c9b32e 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -67,6 +67,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil: return result +# TODO: Is this redundant with algos.kth_smallest? cdef inline float64_t kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n) nogil: diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 557e3e34aee25..88b4d97de492c 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -132,6 +132,7 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1, v2[0] = _rotl(v2[0], 32) +# TODO: This appears unused; remove? cpdef uint64_t siphash(bytes data, bytes key) except? 0: if len(key) != 16: raise ValueError("key should be a 16-byte bytestring, " diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 9c906a00bd4fe..d5846f2b42378 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -49,7 +49,7 @@ cpdef get_value_at(ndarray arr, object loc, object tz=None): return util.get_value_at(arr, loc) -cpdef object get_value_box(ndarray arr, object loc): +def get_value_box(arr: ndarray, loc: object) -> object: return get_value_at(arr, loc, tz=None) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 97cc7f96cb24f..996570dae3302 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -184,7 +184,7 @@ cdef class BlockPlacement: return self._as_slice -cpdef slice_canonize(slice s): +cdef slice_canonize(slice s): """ Convert slice to canonical bounded form. """ @@ -255,7 +255,7 @@ cpdef Py_ssize_t slice_len( return length -cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): +cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): """ Get (start, stop, step, length) tuple for a slice. diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 5ae20a27c2381..d8e2e8eb4b4ea 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -362,8 +362,8 @@ cdef class Interval(IntervalMixin): @cython.wraparound(False) @cython.boundscheck(False) -cpdef intervals_to_interval_bounds(ndarray intervals, - bint validate_closed=True): +def intervals_to_interval_bounds(ndarray intervals, + bint validate_closed=True): """ Parameters ---------- @@ -415,4 +415,5 @@ cpdef intervals_to_interval_bounds(ndarray intervals, return left, right, closed + include "intervaltree.pxi" diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a6078da28a3ba..6b425d7022ecd 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -107,7 +107,7 @@ def memory_usage_of_objects(object[:] arr): # ---------------------------------------------------------------------- -cpdef bint is_scalar(object val): +def is_scalar(val: object) -> bint: """ Return True if given value is scalar. @@ -137,7 +137,7 @@ cpdef bint is_scalar(object val): or util.is_period_object(val) or is_decimal(val) or is_interval(val) - or is_offset(val)) + or util.is_offset_object(val)) def item_from_zerodim(object val): @@ -457,7 +457,7 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask): @cython.wraparound(False) @cython.boundscheck(False) -cpdef bint array_equivalent_object(object[:] left, object[:] right): +def array_equivalent_object(left: object[:], right: object[:]) -> bint: """ perform an element by element comparion on 1-d object arrays taking into account nan positions """ cdef: @@ -497,7 +497,7 @@ def astype_intsafe(ndarray[object] arr, new_dtype): return result -cpdef ndarray[object] astype_unicode(ndarray arr): +def astype_unicode(arr: ndarray) -> ndarray[object]: cdef: Py_ssize_t i, n = arr.size ndarray[object] result = np.empty(n, dtype=object) @@ -508,7 +508,7 @@ cpdef ndarray[object] astype_unicode(ndarray arr): return result -cpdef ndarray[object] astype_str(ndarray arr): +def astype_str(arr: ndarray) -> ndarray[object]: cdef: Py_ssize_t i, n = arr.size ndarray[object] result = np.empty(n, dtype=object) @@ -791,19 +791,19 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, # core.common import for fast inference checks -cpdef bint is_float(object obj): +def is_float(obj: object) -> bint: return util.is_float_object(obj) -cpdef bint is_integer(object obj): +def is_integer(obj: object) -> bint: return util.is_integer_object(obj) -cpdef bint is_bool(object obj): +def is_bool(obj: object) -> bint: return util.is_bool_object(obj) -cpdef bint is_complex(object obj): +def is_complex(obj: object) -> bint: return util.is_complex_object(obj) @@ -815,15 +815,11 @@ cpdef bint is_interval(object obj): return getattr(obj, '_typ', '_typ') == 'interval' -cpdef bint is_period(object val): +def is_period(val: object) -> bint: """ Return a boolean if this is a Period object """ return util.is_period_object(val) -cdef inline bint is_offset(object val): - return getattr(val, '_typ', '_typ') == 'dateoffset' - - _TYPE_MAP = { 'categorical': 'categorical', 'category': 'categorical', @@ -1225,7 +1221,7 @@ def infer_dtype(object value, bint skipna=False): if is_bytes_array(values, skipna=skipna): return 'bytes' - elif is_period(val): + elif util.is_period_object(val): if is_period_array(values): return 'period' @@ -1243,7 +1239,7 @@ def infer_dtype(object value, bint skipna=False): return 'mixed' -cpdef object infer_datetimelike_array(object arr): +def infer_datetimelike_array(arr: object) -> object: """ infer if we have a datetime or timedelta array - date: we have *only* date and maybe strings, nulls @@ -1580,7 +1576,7 @@ cpdef bint is_datetime64_array(ndarray values): return validator.validate(values) -cpdef bint is_datetime_with_singletz_array(ndarray values): +def is_datetime_with_singletz_array(values: ndarray) -> bint: """ Check values have the same tzinfo attribute. Doesn't check values are datetime-like types. @@ -1616,7 +1612,8 @@ cdef class TimedeltaValidator(TemporalValidator): return is_null_timedelta64(value) -cpdef bint is_timedelta_array(ndarray values): +# TODO: Not used outside of tests; remove? +def is_timedelta_array(values: ndarray) -> bint: cdef: TimedeltaValidator validator = TimedeltaValidator(len(values), skipna=True) @@ -1628,7 +1625,8 @@ cdef class Timedelta64Validator(TimedeltaValidator): return util.is_timedelta64_object(value) -cpdef bint is_timedelta64_array(ndarray values): +# TODO: Not used outside of tests; remove? +def is_timedelta64_array(values: ndarray) -> bint: cdef: Timedelta64Validator validator = Timedelta64Validator(len(values), skipna=True) @@ -1672,7 +1670,7 @@ cpdef bint is_time_array(ndarray values, bint skipna=False): cdef class PeriodValidator(TemporalValidator): cdef inline bint is_value_typed(self, object value) except -1: - return is_period(value) + return util.is_period_object(value) cdef inline bint is_valid_null(self, object value) except -1: return is_null_period(value) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index bdd279b19208b..93fae695d51fd 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -300,7 +300,7 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, return result -cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): +def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): """ convert the ndarray according to the unit if errors: diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 796f4b754857e..8e55ffad8d231 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -3,8 +3,7 @@ cimport cython from cython cimport Py_ssize_t -from cpython cimport (PyString_Check, PyBytes_Check, PyUnicode_Check, - PyBytes_GET_SIZE, PyUnicode_GET_SIZE) +from cpython cimport PyBytes_GET_SIZE, PyUnicode_GET_SIZE try: from cpython cimport PyString_GET_SIZE @@ -124,7 +123,7 @@ def convert_json_to_lines(object arr): # stata, pytables @cython.boundscheck(False) @cython.wraparound(False) -cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): +def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: """ return the maximum size of elements in a 1-dim string array """ cdef: Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] @@ -132,11 +131,11 @@ cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): for i in range(length): v = arr[i] - if PyString_Check(v): + if isinstance(v, str): l = PyString_GET_SIZE(v) - elif PyBytes_Check(v): + elif isinstance(v, bytes): l = PyBytes_GET_SIZE(v) - elif PyUnicode_Check(v): + elif isinstance(v, unicode): l = PyUnicode_GET_SIZE(v) if l > m: