diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 87130906ef28b..4242a76dcc3b7 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -8,18 +8,16 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in # ensure_dtype # ---------------------------------------------------------------------- -cdef int PLATFORM_INT = (np.arange(0, dtype=np.intp)).descr.type_num - def ensure_platform_int(object arr): # GH3033, GH1392 # platform int is the size of the int pointer, e.g. np.intp if util.is_array(arr): - if (arr).descr.type_num == PLATFORM_INT: + if (arr).descr.type_num == cnp.NPY_INTP: return arr else: # equiv: arr.astype(np.intp) - return cnp.PyArray_Cast(arr, PLATFORM_INT) + return cnp.PyArray_Cast(arr, cnp.NPY_INTP) else: return np.array(arr, dtype=np.intp) diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index ec041c03b05e1..2a3858674af9e 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -103,7 +103,7 @@ def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values, {{else}} def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, {{endif}} - ndarray[intp_t] indexer, + ndarray[intp_t, ndim=1] indexer, {{c_type_out}}[:, :] out, fill_value=np.nan): cdef: @@ -158,7 +158,7 @@ def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values, {{else}} def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, {{endif}} - ndarray[intp_t] indexer, + ndarray[intp_t, ndim=1] indexer, {{c_type_out}}[:, :] out, fill_value=np.nan): @@ -195,8 +195,8 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[intp_t] idx0 = indexer[0] - ndarray[intp_t] idx1 = indexer[1] + ndarray[intp_t, ndim=1] idx0 = indexer[0] + ndarray[intp_t, ndim=1] idx1 = indexer[1] {{c_type_out}} fv n = len(idx0) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 87709ac6c33bf..2f0bcefefaaa1 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -227,7 +227,7 @@ cdef class BlockPlacement: cdef: slice nv, s = self._ensure_has_slice() Py_ssize_t other_int, start, stop, step, l - ndarray newarr + ndarray[intp_t, ndim=1] newarr if s is not None: # see if we are either all-above or all-below, each of which @@ -260,7 +260,7 @@ cdef class BlockPlacement: cdef: slice slc = self._ensure_has_slice() slice new_slice - ndarray new_placement + ndarray[intp_t, ndim=1] new_placement if slc is not None and slc.step == 1: new_slc = slice(slc.start * factor, slc.stop * factor, 1) @@ -345,7 +345,9 @@ cpdef Py_ssize_t slice_len(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except - return length -cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): +cdef (Py_ssize_t, Py_ssize_t, Py_ssize_t, Py_ssize_t) slice_get_indices_ex( + slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX +): """ Get (start, stop, step, length) tuple for a slice. @@ -460,9 +462,11 @@ def get_blkno_indexers( # blockno handling. cdef: int64_t cur_blkno - Py_ssize_t i, start, stop, n, diff, tot_len + Py_ssize_t i, start, stop, n, diff + cnp.npy_intp tot_len int64_t blkno object group_dict = defaultdict(list) + ndarray[int64_t, ndim=1] arr n = blknos.shape[0] result = list() @@ -495,7 +499,8 @@ def get_blkno_indexers( result.append((blkno, slice(slices[0][0], slices[0][1]))) else: tot_len = sum(stop - start for start, stop in slices) - arr = np.empty(tot_len, dtype=np.int64) + # equiv np.empty(tot_len, dtype=np.int64) + arr = cnp.PyArray_EMPTY(1, &tot_len, cnp.NPY_INT64, 0) i = 0 for start, stop in slices: @@ -526,8 +531,13 @@ def get_blkno_placements(blknos, group: bool = True): yield blkno, BlockPlacement(indexer) +@cython.boundscheck(False) +@cython.wraparound(False) cpdef update_blklocs_and_blknos( - ndarray[intp_t] blklocs, ndarray[intp_t] blknos, Py_ssize_t loc, intp_t nblocks + ndarray[intp_t, ndim=1] blklocs, + ndarray[intp_t, ndim=1] blknos, + Py_ssize_t loc, + intp_t nblocks, ): """ Update blklocs and blknos when a new column is inserted at 'loc'. @@ -535,7 +545,7 @@ cpdef update_blklocs_and_blknos( cdef: Py_ssize_t i cnp.npy_intp length = len(blklocs) + 1 - ndarray[intp_t] new_blklocs, new_blknos + ndarray[intp_t, ndim=1] new_blklocs, new_blknos # equiv: new_blklocs = np.empty(length, dtype=np.intp) new_blklocs = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0) @@ -693,7 +703,7 @@ cdef class BlockManager: cnp.npy_intp length = self.shape[0] SharedBlock blk BlockPlacement bp - ndarray[intp_t] new_blknos, new_blklocs + ndarray[intp_t, ndim=1] new_blknos, new_blklocs # equiv: np.empty(length, dtype=np.intp) new_blknos = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0) @@ -711,7 +721,11 @@ cdef class BlockManager: new_blknos[j] = blkno new_blklocs[j] = i - for blkno in new_blknos: + for i in range(length): + # faster than `for blkno in new_blknos` + # https://github.com/cython/cython/issues/4393 + blkno = new_blknos[i] + # If there are any -1s remaining, this indicates that our mgr_locs # are invalid. if blkno == -1: diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 7e49c7f1952c4..2c7b052917463 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -448,7 +448,7 @@ def fast_zip(list ndarrays) -> ndarray[object]: """ cdef: Py_ssize_t i, j, k, n - ndarray[object] result + ndarray[object, ndim=1] result flatiter it object val, tup @@ -507,7 +507,7 @@ def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray: """ cdef: Py_ssize_t i, n = len(indexer) - ndarray[intp_t] rev_indexer + ndarray[intp_t, ndim=1] rev_indexer intp_t idx rev_indexer = np.empty(length, dtype=np.intp) @@ -540,7 +540,7 @@ def has_infs(floating[:] arr) -> bool: return ret -def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len): +def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) int k, vstart, vlast, v @@ -579,7 +579,7 @@ def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len): @cython.wraparound(False) @cython.boundscheck(False) -def maybe_booleans_to_slice(ndarray[uint8_t] mask): +def maybe_booleans_to_slice(ndarray[uint8_t, ndim=1] mask): cdef: Py_ssize_t i, n = len(mask) Py_ssize_t start = 0, end = 0 @@ -775,14 +775,14 @@ def is_all_arraylike(obj: list) -> bool: # is a general, O(max(len(values), len(binner))) method. @cython.boundscheck(False) @cython.wraparound(False) -def generate_bins_dt64(ndarray[int64_t] values, const int64_t[:] binner, +def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner, object closed='left', bint hasnans=False): """ Int64 (datetime64) version of generic python version in ``groupby.py``. """ cdef: Py_ssize_t lenidx, lenbin, i, j, bc, vc - ndarray[int64_t] bins + ndarray[int64_t, ndim=1] bins int64_t l_bin, r_bin, nat_count bint right_closed = closed == 'right' @@ -931,7 +931,7 @@ def generate_slices(const intp_t[:] labels, Py_ssize_t ngroups): return np.asarray(starts), np.asarray(ends) -def indices_fast(ndarray[intp_t] index, const int64_t[:] labels, list keys, +def indices_fast(ndarray[intp_t, ndim=1] index, const int64_t[:] labels, list keys, list sorted_labels) -> dict: """ Parameters @@ -2067,7 +2067,9 @@ cdef bint is_period_array(ndarray[object] values): if len(values) == 0: return False - for val in values: + for i in range(n): + val = values[i] + if is_period_object(val): if dtype_code == -10000: dtype_code = val._dtype._dtype_code @@ -2102,7 +2104,9 @@ cpdef bint is_interval_array(ndarray values): if len(values) == 0: return False - for val in values: + for i in range(n): + val = values[i] + if is_interval(val): if closed is None: closed = val.closed @@ -2144,7 +2148,7 @@ cpdef bint is_interval_array(ndarray values): @cython.boundscheck(False) @cython.wraparound(False) def maybe_convert_numeric( - ndarray[object] values, + ndarray[object, ndim=1] values, set na_values, bint convert_empty=True, bint coerce_numeric=False, @@ -2205,12 +2209,12 @@ def maybe_convert_numeric( int status, maybe_int Py_ssize_t i, n = values.size Seen seen = Seen(coerce_numeric) - ndarray[float64_t] floats = np.empty(n, dtype='f8') - ndarray[complex128_t] complexes = np.empty(n, dtype='c16') - ndarray[int64_t] ints = np.empty(n, dtype='i8') - ndarray[uint64_t] uints = np.empty(n, dtype='u8') - ndarray[uint8_t] bools = np.empty(n, dtype='u1') - ndarray[uint8_t] mask = np.zeros(n, dtype="u1") + ndarray[float64_t, ndim=1] floats = np.empty(n, dtype='f8') + ndarray[complex128_t, ndim=1] complexes = np.empty(n, dtype='c16') + ndarray[int64_t, ndim=1] ints = np.empty(n, dtype='i8') + ndarray[uint64_t, ndim=1] uints = np.empty(n, dtype='u8') + ndarray[uint8_t, ndim=1] bools = np.empty(n, dtype='u1') + ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1") float64_t fval bint allow_null_in_int = convert_to_masked_nullable diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index ff15a2c720c2c..cfe9f40f12452 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -7,10 +7,9 @@ from numpy cimport import_array import_array() -from pandas._libs.lib import is_complex - from pandas._libs.util cimport ( is_array, + is_complex_object, is_real_number_object, ) @@ -196,7 +195,7 @@ cpdef assert_almost_equal(a, b, f"with rtol={rtol}, atol={atol}") return True - if is_complex(a) and is_complex(b): + if is_complex_object(a) and is_complex_object(b): if array_equivalent(a, b, strict_nan=True): # inf comparison return True