diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 42dda15ea2cbb..1fcecba0821c8 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -19,33 +19,44 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in # 1-d template #---------------------------------------------------------------------- -{{py: +ctypedef fused algos_t: + float64_t + float32_t + object + int32_t + int64_t + uint64_t + uint8_t -# name, c_type, dtype, can_hold_na, nogil -dtypes = [('float64', 'float64_t', 'np.float64', True, True), - ('float32', 'float32_t', 'np.float32', True, True), - ('object', 'object', 'object', True, False), - ('int32', 'int32_t', 'np.int32', False, True), - ('int64', 'int64_t', 'np.int64', False, True), - ('uint64', 'uint64_t', 'np.uint64', False, True), - ('bool', 'uint8_t', 'np.bool', False, True)] -def get_dispatch(dtypes): +@cython.wraparound(False) +@cython.boundscheck(False) +def arrmap(ndarray[algos_t] index, object func): + cdef: + Py_ssize_t length = index.shape[0] + Py_ssize_t i = 0 + ndarray[object] result = np.empty(length, dtype=np.object_) - for name, c_type, dtype, can_hold_na, nogil in dtypes: + from pandas._libs.lib import maybe_convert_objects - nogil_str = 'with nogil:' if nogil else '' - tab = ' ' if nogil else '' - yield name, c_type, dtype, can_hold_na, nogil_str, tab -}} + for i in range(length): + result[i] = func(index[i]) + + return maybe_convert_objects(result) -{{for name, c_type, dtype, can_hold_na, nogil_str, tab - in get_dispatch(dtypes)}} + +arrmap_float64 = arrmap["float64_t"] +arrmap_float32 = arrmap["float32_t"] +arrmap_object = arrmap["object"] +arrmap_int32 = arrmap["int32_t"] +arrmap_int64 = arrmap["int64_t"] +arrmap_uint64 = arrmap["uint64_t"] +arrmap_bool = arrmap["uint8_t"] @cython.wraparound(False) @cython.boundscheck(False) -cpdef map_indices_{{name}}(ndarray[{{c_type}}] index): +cpdef map_indices(ndarray[algos_t] index): """ Produce a dict mapping the values of the input array to their respective locations. @@ -55,8 +66,9 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index): Better to do this with Cython because of the enormous speed boost. """ - cdef Py_ssize_t i, length - cdef dict result = {} + cdef: + Py_ssize_t i, length + dict result = {} length = len(index) @@ -66,13 +78,22 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index): return result +map_indices_float64 = map_indices["float64_t"] +map_indices_float32 = map_indices["float32_t"] +map_indices_object = map_indices["object"] +map_indices_int32 = map_indices["int32_t"] +map_indices_int64 = map_indices["int64_t"] +map_indices_uint64 = map_indices["uint64_t"] +map_indices_bool = map_indices["uint8_t"] + + @cython.boundscheck(False) @cython.wraparound(False) -def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None): +def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): cdef: Py_ssize_t i, j, nleft, nright ndarray[int64_t, ndim=1] indexer - {{c_type}} cur, next + algos_t cur, next int lim, fill_count = 0 nleft = len(old) @@ -129,20 +150,28 @@ def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None): return indexer +pad_float64 = pad["float64_t"] +pad_float32 = pad["float32_t"] +pad_object = pad["object"] +pad_int32 = pad["int32_t"] +pad_int64 = pad["int64_t"] +pad_uint64 = pad["uint64_t"] +pad_bool = pad["uint8_t"] + @cython.boundscheck(False) @cython.wraparound(False) -def pad_inplace_{{name}}(ndarray[{{c_type}}] values, - ndarray[uint8_t, cast=True] mask, - limit=None): +def pad_inplace(ndarray[algos_t] values, + ndarray[uint8_t, cast=True] mask, + limit=None): cdef: - Py_ssize_t i, N - {{c_type}} val - int lim, fill_count = 0 + cdef Py_ssize_t i, N + cdef algos_t val + cdef int lim, fill_count = 0 N = len(values) - # GH 2778 + # GH#2778 if N == 0: return @@ -167,19 +196,28 @@ def pad_inplace_{{name}}(ndarray[{{c_type}}] values, val = values[i] +pad_inplace_float64 = pad_inplace["float64_t"] +pad_inplace_float32 = pad_inplace["float32_t"] +pad_inplace_object = pad_inplace["object"] +pad_inplace_int32 = pad_inplace["int32_t"] +pad_inplace_int64 = pad_inplace["int64_t"] +pad_inplace_uint64 = pad_inplace["uint64_t"] +pad_inplace_bool = pad_inplace["uint8_t"] + + @cython.boundscheck(False) @cython.wraparound(False) -def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): +def pad_2d_inplace(ndarray[algos_t, ndim=2] values, + ndarray[uint8_t, ndim=2] mask, + limit=None): cdef: Py_ssize_t i, j, N, K - {{c_type}} val + algos_t val int lim, fill_count = 0 K, N = ( values).shape - # GH 2778 + # GH#2778 if N == 0: return @@ -205,6 +243,16 @@ def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, fill_count = 0 val = values[j, i] + +pad_2d_inplace_float64 = pad_2d_inplace["float64_t"] +pad_2d_inplace_float32 = pad_2d_inplace["float32_t"] +pad_2d_inplace_object = pad_2d_inplace["object"] +pad_2d_inplace_int32 = pad_2d_inplace["int32_t"] +pad_2d_inplace_int64 = pad_2d_inplace["int64_t"] +pad_2d_inplace_uint64 = pad_2d_inplace["uint64_t"] +pad_2d_inplace_bool = pad_2d_inplace["uint8_t"] + + """ Backfilling logic for generating fill vector @@ -233,13 +281,12 @@ D @cython.boundscheck(False) @cython.wraparound(False) -def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, - limit=None): +def backfill(ndarray[algos_t] old, ndarray[algos_t] new,limit=None): cdef: - Py_ssize_t i, j, nleft, nright - ndarray[int64_t, ndim=1] indexer - {{c_type}} cur, prev - int lim, fill_count = 0 + cdef Py_ssize_t i, j, nleft, nright + cdef ndarray[int64_t, ndim=1] indexer + cdef algos_t cur, prev + cdef int lim, fill_count = 0 nleft = len(old) nright = len(new) @@ -297,19 +344,28 @@ def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, return indexer +backfill_float64 = backfill["float64_t"] +backfill_float32 = backfill["float32_t"] +backfill_object = backfill["object"] +backfill_int32 = backfill["int32_t"] +backfill_int64 = backfill["int64_t"] +backfill_uint64 = backfill["uint64_t"] +backfill_bool = backfill["uint8_t"] + + @cython.boundscheck(False) @cython.wraparound(False) -def backfill_inplace_{{name}}(ndarray[{{c_type}}] values, - ndarray[uint8_t, cast=True] mask, - limit=None): +def backfill_inplace(ndarray[algos_t] values, + ndarray[uint8_t, cast=True] mask, + limit=None): cdef: - Py_ssize_t i, N - {{c_type}} val - int lim, fill_count = 0 + cdef Py_ssize_t i, N + cdef algos_t val + cdef int lim, fill_count = 0 N = len(values) - # GH 2778 + # GH#2778 if N == 0: return @@ -334,19 +390,28 @@ def backfill_inplace_{{name}}(ndarray[{{c_type}}] values, val = values[i] +backfill_inplace_float64 = backfill_inplace["float64_t"] +backfill_inplace_float32 = backfill_inplace["float32_t"] +backfill_inplace_object = backfill_inplace["object"] +backfill_inplace_int32 = backfill_inplace["int32_t"] +backfill_inplace_int64 = backfill_inplace["int64_t"] +backfill_inplace_uint64 = backfill_inplace["uint64_t"] +backfill_inplace_bool = backfill_inplace["uint8_t"] + + @cython.boundscheck(False) @cython.wraparound(False) -def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): +def backfill_2d_inplace(ndarray[algos_t, ndim=2] values, + ndarray[uint8_t, ndim=2] mask, + limit=None): cdef: Py_ssize_t i, j, N, K - {{c_type}} val + algos_t val int lim, fill_count = 0 K, N = ( values).shape - # GH 2778 + # GH#2778 if N == 0: return @@ -373,6 +438,39 @@ def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, val = values[j, i] +backfill_2d_inplace_float64 = backfill_2d_inplace["float64_t"] +backfill_2d_inplace_float32 = backfill_2d_inplace["float32_t"] +backfill_2d_inplace_object = backfill_2d_inplace["object"] +backfill_2d_inplace_int32 = backfill_2d_inplace["int32_t"] +backfill_2d_inplace_int64 = backfill_2d_inplace["int64_t"] +backfill_2d_inplace_uint64 = backfill_2d_inplace["uint64_t"] +backfill_2d_inplace_bool = backfill_2d_inplace["uint8_t"] + + +{{py: + +# name, c_type, dtype, can_hold_na, nogil +dtypes = [('float64', 'float64_t', 'np.float64', True, True), + ('float32', 'float32_t', 'np.float32', True, True), + ('object', 'object', 'object', True, False), + ('int32', 'int32_t', 'np.int32', False, True), + ('int64', 'int64_t', 'np.int64', False, True), + ('uint64', 'uint64_t', 'np.uint64', False, True), + ('bool', 'uint8_t', 'np.bool', False, True)] + +def get_dispatch(dtypes): + + for name, c_type, dtype, can_hold_na, nogil in dtypes: + + nogil_str = 'with nogil:' if nogil else '' + tab = ' ' if nogil else '' + yield name, c_type, dtype, can_hold_na, nogil_str, tab +}} + +{{for name, c_type, dtype, can_hold_na, nogil_str, tab + in get_dispatch(dtypes)}} + + @cython.boundscheck(False) @cython.wraparound(False) def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike): @@ -429,22 +527,6 @@ def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike): return is_monotonic_inc, is_monotonic_dec, \ is_unique and (is_monotonic_inc or is_monotonic_dec) - -@cython.wraparound(False) -@cython.boundscheck(False) -def arrmap_{{name}}(ndarray[{{c_type}}] index, object func): - cdef: - Py_ssize_t length = index.shape[0] - Py_ssize_t i = 0 - ndarray[object] result = np.empty(length, dtype=np.object_) - - from pandas._libs.lib import maybe_convert_objects - - for i in range(length): - result[i] = func(index[i]) - - return maybe_convert_objects(result) - {{endfor}} #---------------------------------------------------------------------- diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 0e69324acd341..4883e067ea8c4 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -264,29 +264,34 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, # take_2d internal function #---------------------------------------------------------------------- -{{py: - -# dtype, ctype, init_result -dtypes = [('float64', 'float64_t', 'np.empty_like(values)'), - ('uint64', 'uint64_t', 'np.empty_like(values)'), - ('object', 'object', 'values.copy()'), - ('int64', 'int64_t', 'np.empty_like(values)')] -}} +ctypedef fused take_t: + float64_t + uint64_t + object + int64_t -{{for dtype, ctype, init_result in dtypes}} -cdef _take_2d_{{dtype}}(ndarray[{{ctype}}, ndim=2] values, object idx): +cdef _take_2d(ndarray[take_t, ndim=2] values, object idx): cdef: Py_ssize_t i, j, N, K ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx - ndarray[{{ctype}}, ndim=2] result + ndarray[take_t, ndim=2] result object val N, K = ( values).shape - result = {{init_result}} + if take_t is object: + result = values.copy() + else: + result = np.empty_like(values) + for i in range(N): for j in range(K): result[i, j] = values[i, indexer[i, j]] return result -{{endfor}} + +# TODO: Are these treated as cdefs? +_take_2d_float64 = _take_2d[float64_t] +_take_2d_uint64 = _take_2d[uint64_t] +_take_2d_object = _take_2d[object] +_take_2d_int64 = _take_2d[int64_t] diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 0062a6c8d31ab..7b1dc8f41575c 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -593,37 +593,26 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, # group_min, group_max #---------------------------------------------------------------------- -{{py: - -# name, c_type, dest_type2, nan_val -dtypes = [('float64', 'float64_t', 'NAN', 'np.inf'), - ('float32', 'float32_t', 'NAN', 'np.inf'), - ('int64', 'int64_t', 'iNaT', '_int64_max')] - -def get_dispatch(dtypes): - - for name, dest_type2, nan_val, inf_val in dtypes: - yield name, dest_type2, nan_val, inf_val -}} - - -{{for name, dest_type2, nan_val, inf_val in get_dispatch(dtypes)}} +ctypedef fused group_t: + float64_t + float32_t + int64_t @cython.wraparound(False) @cython.boundscheck(False) -def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, - ndarray[int64_t] counts, - ndarray[{{dest_type2}}, ndim=2] values, - ndarray[int64_t] labels, - Py_ssize_t min_count=-1): +def group_max(ndarray[group_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[group_t, ndim=2] values, + ndarray[int64_t] labels, + Py_ssize_t min_count=-1): """ Only aggregates on axis=0 """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, count - ndarray[{{dest_type2}}, ndim=2] maxx, nobs + group_t val, count + ndarray[group_t, ndim=2] maxx, nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -633,7 +622,12 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, nobs = np.zeros_like(out) maxx = np.empty_like(out) - maxx.fill(-{{inf_val}}) + + if group_t is int64_t: + # evaluated at compile-time + maxx.fill(-_int64_max) + else: + maxx.fill(-np.inf) N, K = ( values).shape @@ -648,11 +642,9 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, val = values[i, j] # not nan - {{if name == 'int64'}} - if val != {{nan_val}}: - {{else}} - if val == val and val != {{nan_val}}: - {{endif}} + if ((group_t is int64_t and val != iNaT) or + (group_t is not int64_t and + val == val and val != NAN)): nobs[lab, j] += 1 if val > maxx[lab, j]: maxx[lab, j] = val @@ -660,25 +652,33 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, for i in range(ncounts): for j in range(K): if nobs[i, j] == 0: - out[i, j] = {{nan_val}} + if group_t is int64_t: + out[i, j] = iNaT + else: + out[i, j] = NAN else: out[i, j] = maxx[i, j] +group_max_float64 = group_max["float64_t"] +group_max_float32 = group_max["float32_t"] +group_max_int64 = group_max["int64_t"] + + @cython.wraparound(False) @cython.boundscheck(False) -def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, - ndarray[int64_t] counts, - ndarray[{{dest_type2}}, ndim=2] values, - ndarray[int64_t] labels, - Py_ssize_t min_count=-1): +def group_min(ndarray[group_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[group_t, ndim=2] values, + ndarray[int64_t] labels, + Py_ssize_t min_count=-1): """ Only aggregates on axis=0 """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, count - ndarray[{{dest_type2}}, ndim=2] minx, nobs + group_t val, count + ndarray[group_t, ndim=2] minx, nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -688,7 +688,12 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, nobs = np.zeros_like(out) minx = np.empty_like(out) - minx.fill({{inf_val}}) + + if group_t is int64_t: + # evaluated at compile-time + minx.fill(_int64_max) + else: + minx.fill(np.inf) N, K = ( values).shape @@ -703,11 +708,9 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, val = values[i, j] # not nan - {{if name == 'int64'}} - if val != {{nan_val}}: - {{else}} - if val == val and val != {{nan_val}}: - {{endif}} + if ((group_t is int64_t and val != iNaT) or + (group_t is not int64_t and + val == val and val != NAN)): nobs[lab, j] += 1 if val < minx[lab, j]: minx[lab, j] = val @@ -715,29 +718,42 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, for i in range(ncounts): for j in range(K): if nobs[i, j] == 0: - out[i, j] = {{nan_val}} + if group_t is int64_t: + out[i, j] = iNaT + else: + out[i, j] = NAN else: out[i, j] = minx[i, j] +group_min_float64 = group_min["float64_t"] +group_min_float32 = group_min["float32_t"] +group_min_int64 = group_min["int64_t"] + + @cython.boundscheck(False) @cython.wraparound(False) -def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, - ndarray[{{dest_type2}}, ndim=2] values, - ndarray[int64_t] labels, - bint is_datetimelike): +def group_cummin(ndarray[group_t, ndim=2] out, + ndarray[group_t, ndim=2] values, + ndarray[int64_t] labels, + bint is_datetimelike): """ Only transforms on axis=0 """ cdef: Py_ssize_t i, j, N, K, size - {{dest_type2}} val, mval - ndarray[{{dest_type2}}, ndim=2] accum + group_t val, mval + ndarray[group_t, ndim=2] accum int64_t lab N, K = ( values).shape accum = np.empty_like(values) - accum.fill({{inf_val}}) + + if group_t is int64_t: + # evaluated at compile-time + accum.fill(_int64_max) + else: + accum.fill(np.inf) with nogil: for i in range(N): @@ -749,37 +765,50 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, val = values[i, j] # val = nan - {{if name == 'int64'}} - if is_datetimelike and val == {{nan_val}}: - out[i, j] = {{nan_val}} + if group_t is int64_t: + # evaluated at compile-time + if is_datetimelike and val == iNaT: + out[i, j] = iNaT + continue + else: - {{else}} - if val == val: - {{endif}} - mval = accum[lab, j] - if val < mval: - accum[lab, j] = mval = val - out[i, j] = mval + if val != val: + continue + + mval = accum[lab, j] + if val < mval: + accum[lab, j] = mval = val + out[i, j] = mval + + +group_cummin_float64 = group_cummin["float64_t"] +group_cummin_float32 = group_cummin["float32_t"] +group_cummin_int64 = group_cummin["int64_t"] @cython.boundscheck(False) @cython.wraparound(False) -def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, - ndarray[{{dest_type2}}, ndim=2] values, - ndarray[int64_t] labels, - bint is_datetimelike): +def group_cummax(ndarray[group_t, ndim=2] out, + ndarray[group_t, ndim=2] values, + ndarray[int64_t] labels, + bint is_datetimelike): """ Only transforms on axis=0 """ cdef: Py_ssize_t i, j, N, K, size - {{dest_type2}} val, mval - ndarray[{{dest_type2}}, ndim=2] accum + group_t val, mval + ndarray[group_t, ndim=2] accum int64_t lab N, K = ( values).shape accum = np.empty_like(values) - accum.fill(-{{inf_val}}) + + if group_t is int64_t: + # evaluated at compile-time + accum.fill(-_int64_max) + else: + accum.fill(-np.inf) with nogil: for i in range(N): @@ -790,16 +819,21 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, for j in range(K): val = values[i, j] - {{if name == 'int64'}} - if is_datetimelike and val == {{nan_val}}: - out[i, j] = {{nan_val}} + if group_t is int64_t: + # evaluated at compile-time + if is_datetimelike and val == iNaT: + out[i, j] = iNaT + continue else: - {{else}} - if val == val: - {{endif}} - mval = accum[lab, j] - if val > mval: - accum[lab, j] = mval = val - out[i, j] = mval + if val != val: + continue -{{endfor}} + mval = accum[lab, j] + if val > mval: + accum[lab, j] = mval = val + out[i, j] = mval + + +group_cummax_float64 = group_cummax["float64_t"] +group_cummax_float32 = group_cummax["float32_t"] +group_cummax_int64 = group_cummax["int64_t"] diff --git a/pandas/_libs/join_helper.pxi.in b/pandas/_libs/join_helper.pxi.in index feb8cfb76a7f0..3b84edc1c60d3 100644 --- a/pandas/_libs/join_helper.pxi.in +++ b/pandas/_libs/join_helper.pxi.in @@ -8,24 +8,13 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in # left_join_indexer, inner_join_indexer, outer_join_indexer #---------------------------------------------------------------------- -{{py: - -# name, c_type, dtype -dtypes = [('float64', 'float64_t', 'np.float64'), - ('float32', 'float32_t', 'np.float32'), - ('object', 'object', 'object'), - ('int32', 'int32_t', 'np.int32'), - ('int64', 'int64_t', 'np.int64'), - ('uint64', 'uint64_t', 'np.uint64')] - -def get_dispatch(dtypes): - - for name, c_type, dtype in dtypes: - yield name, c_type, dtype - -}} - -{{for name, c_type, dtype in get_dispatch(dtypes)}} +ctypedef fused join_t: + float64_t + float32_t + object + int32_t + int64_t + uint64_t # Joins on ordered, unique indices @@ -34,12 +23,11 @@ def get_dispatch(dtypes): @cython.wraparound(False) @cython.boundscheck(False) -def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left, - ndarray[{{c_type}}] right): +def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right): cdef: Py_ssize_t i, j, nleft, nright ndarray[int64_t] indexer - {{c_type}} lval, rval + join_t lval, rval i = 0 j = 0 @@ -78,6 +66,37 @@ def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left, return indexer +left_join_indexer_unique_float64 = left_join_indexer_unique["float64_t"] +left_join_indexer_unique_float32 = left_join_indexer_unique["float32_t"] +left_join_indexer_unique_object = left_join_indexer_unique["object"] +left_join_indexer_unique_int32 = left_join_indexer_unique["int32_t"] +left_join_indexer_unique_int64 = left_join_indexer_unique["int64_t"] +left_join_indexer_unique_uint64 = left_join_indexer_unique["uint64_t"] + +{{py: + +# name, c_type, dtype +dtypes = [('float64', 'float64_t', 'np.float64'), + ('float32', 'float32_t', 'np.float32'), + ('object', 'object', 'object'), + ('int32', 'int32_t', 'np.int32'), + ('int64', 'int64_t', 'np.int64'), + ('uint64', 'uint64_t', 'np.uint64')] + +def get_dispatch(dtypes): + + for name, c_type, dtype in dtypes: + yield name, c_type, dtype + +}} + +{{for name, c_type, dtype in get_dispatch(dtypes)}} + +# Joins on ordered, unique indices + +# right might contain non-unique values + + # @cython.wraparound(False) # @cython.boundscheck(False) def left_join_indexer_{{name}}(ndarray[{{c_type}}] left, diff --git a/pandas/_libs/reshape_helper.pxi.in b/pandas/_libs/reshape_helper.pxi.in index bb9a5977f8b45..0eab84c71ee71 100644 --- a/pandas/_libs/reshape_helper.pxi.in +++ b/pandas/_libs/reshape_helper.pxi.in @@ -8,34 +8,28 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in # reshape # ---------------------------------------------------------------------- -{{py: - -# name, c_type -dtypes = [('uint8', 'uint8_t'), - ('uint16', 'uint16_t'), - ('uint32', 'uint32_t'), - ('uint64', 'uint64_t'), - ('int8', 'int8_t'), - ('int16', 'int16_t'), - ('int32', 'int32_t'), - ('int64', 'int64_t'), - ('float32', 'float32_t'), - ('float64', 'float64_t'), - ('object', 'object')] -}} - -{{for dtype, c_type in dtypes}} - +ctypedef fused reshape_t: + uint8_t + uint16_t + uint32_t + uint64_t + int8_t + int16_t + int32_t + int64_t + float32_t + float64_t + object @cython.wraparound(False) @cython.boundscheck(False) -def unstack_{{dtype}}(ndarray[{{c_type}}, ndim=2] values, - ndarray[uint8_t, ndim=1] mask, - Py_ssize_t stride, - Py_ssize_t length, - Py_ssize_t width, - ndarray[{{c_type}}, ndim=2] new_values, - ndarray[uint8_t, ndim=2] new_mask): +def unstack(ndarray[reshape_t, ndim=2] values, + ndarray[uint8_t, ndim=1] mask, + Py_ssize_t stride, + Py_ssize_t length, + Py_ssize_t width, + ndarray[reshape_t, ndim=2] new_values, + ndarray[uint8_t, ndim=2] new_mask): """ transform long sorted_values to wide new_values @@ -50,23 +44,33 @@ def unstack_{{dtype}}(ndarray[{{c_type}}, ndim=2] values, result array new_mask : boolean ndarray result mask - """ - cdef: Py_ssize_t i, j, w, nulls, s, offset - {{if dtype == 'object'}} - if True: - {{else}} - with nogil: - {{endif}} + if reshape_t is not object: + with nogil: + for i in range(stride): + nulls = 0 - for i in range(stride): + for j in range(length): + for w in range(width): + offset = j * width + w + + if mask[offset]: + s = i * width + w + new_values[j, s] = values[offset - nulls, i] + new_mask[j, s] = 1 + else: + nulls += 1 + + else: + # identical to above version, but "with nogil" is not available + for i in range(stride): nulls = 0 - for j in range(length): + for j in range(length): for w in range(width): offset = j * width + w @@ -78,4 +82,15 @@ def unstack_{{dtype}}(ndarray[{{c_type}}, ndim=2] values, else: nulls += 1 -{{endfor}} + +unstack_uint8 = unstack["uint8_t"] +unstack_uint16 = unstack["uint16_t"] +unstack_uint32 = unstack["uint32_t"] +unstack_uint64 = unstack["uint64_t"] +unstack_int8 = unstack["int8_t"] +unstack_int16 = unstack["int16_t"] +unstack_int32 = unstack["int32_t"] +unstack_int64 = unstack["int64_t"] +unstack_float32 = unstack["float32_t"] +unstack_float64 = unstack["float64_t"] +unstack_object = unstack["object"]