From 894a07b87d66a1b3d573b3f20638043cdb239e3a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 30 Sep 2018 11:07:30 -0700 Subject: [PATCH 1/4] cython cleanup --- pandas/_libs/algos_rank_helper.pxi.in | 9 ------ pandas/_libs/join_func_helper.pxi.in | 44 +++++++++++++-------------- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index b2551f3733904..130276ae0e73c 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -24,17 +24,8 @@ dtypes = [('object', 'object', 'Infinity()', 'NegInfinity()'), @cython.wraparound(False) @cython.boundscheck(False) -{{if dtype == 'object'}} - - def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True, na_option='keep', pct=False): -{{else}} - - -def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True, - na_option='keep', pct=False): -{{endif}} """ Fast NaN-friendly version of scipy.stats.rankdata """ diff --git a/pandas/_libs/join_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in index 73d231b8588dc..a72b113a6fdb6 100644 --- a/pandas/_libs/join_func_helper.pxi.in +++ b/pandas/_libs/join_func_helper.pxi.in @@ -68,21 +68,21 @@ def asof_join_backward_{{on_dtype}}_by_{{by_dtype}}( # find last position in right whose value is less than left's if allow_exact_matches: - while right_pos < right_size and\ - right_values[right_pos] <= left_values[left_pos]: + while (right_pos < right_size and + right_values[right_pos] <= left_values[left_pos]): hash_table.set_item(right_by_values[right_pos], right_pos) right_pos += 1 else: - while right_pos < right_size and\ - right_values[right_pos] < left_values[left_pos]: + while (right_pos < right_size and + right_values[right_pos] < left_values[left_pos]): hash_table.set_item(right_by_values[right_pos], right_pos) right_pos += 1 right_pos -= 1 # save positions as the desired index by_value = left_by_values[left_pos] - found_right_pos = hash_table.get_item(by_value)\ - if by_value in hash_table else -1 + found_right_pos = (hash_table.get_item(by_value) + if by_value in hash_table else -1) left_indexer[left_pos] = left_pos right_indexer[left_pos] = found_right_pos @@ -133,21 +133,21 @@ def asof_join_forward_{{on_dtype}}_by_{{by_dtype}}( # find first position in right whose value is greater than left's if allow_exact_matches: - while right_pos >= 0 and\ - right_values[right_pos] >= left_values[left_pos]: + while (right_pos >= 0 and + right_values[right_pos] >= left_values[left_pos]): hash_table.set_item(right_by_values[right_pos], right_pos) right_pos -= 1 else: - while right_pos >= 0 and\ - right_values[right_pos] > left_values[left_pos]: + while (right_pos >= 0 and + right_values[right_pos] > left_values[left_pos]): hash_table.set_item(right_by_values[right_pos], right_pos) right_pos -= 1 right_pos += 1 # save positions as the desired index by_value = left_by_values[left_pos] - found_right_pos = hash_table.get_item(by_value)\ - if by_value in hash_table else -1 + found_right_pos = (hash_table.get_item(by_value) + if by_value in hash_table else -1) left_indexer[left_pos] = left_pos right_indexer[left_pos] = found_right_pos @@ -259,12 +259,12 @@ def asof_join_backward_{{on_dtype}}( # find last position in right whose value is less than left's if allow_exact_matches: - while right_pos < right_size and\ - right_values[right_pos] <= left_values[left_pos]: + while (right_pos < right_size and + right_values[right_pos] <= left_values[left_pos]): right_pos += 1 else: - while right_pos < right_size and\ - right_values[right_pos] < left_values[left_pos]: + while (right_pos < right_size and + right_values[right_pos] < left_values[left_pos]): right_pos += 1 right_pos -= 1 @@ -313,19 +313,19 @@ def asof_join_forward_{{on_dtype}}( # find first position in right whose value is greater than left's if allow_exact_matches: - while right_pos >= 0 and\ - right_values[right_pos] >= left_values[left_pos]: + while (right_pos >= 0 and + right_values[right_pos] >= left_values[left_pos]): right_pos -= 1 else: - while right_pos >= 0 and\ - right_values[right_pos] > left_values[left_pos]: + while (right_pos >= 0 and + right_values[right_pos] > left_values[left_pos]): right_pos -= 1 right_pos += 1 # save positions as the desired index left_indexer[left_pos] = left_pos - right_indexer[left_pos] = right_pos\ - if right_pos != right_size else -1 + right_indexer[left_pos] = (right_pos + if right_pos != right_size else -1) # if needed, verify that tolerance is met if has_tolerance and right_pos != right_size: From 381a259242b8f170f1dc848c6fbd03c8a19cc172 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 30 Sep 2018 11:08:47 -0700 Subject: [PATCH 2/4] use fused type for _take_2d --- pandas/_libs/algos_take_helper.pxi.in | 36 ++++++++++++++++----------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 0e69324acd341..358479c837d05 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -260,33 +260,39 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, {{endfor}} -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # take_2d internal function -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- -{{py: - -# dtype, ctype, init_result -dtypes = [('float64', 'float64_t', 'np.empty_like(values)'), - ('uint64', 'uint64_t', 'np.empty_like(values)'), - ('object', 'object', 'values.copy()'), - ('int64', 'int64_t', 'np.empty_like(values)')] -}} +ctypedef fused take_t: + float64_t + uint64_t + int64_t + object -{{for dtype, ctype, init_result in dtypes}} -cdef _take_2d_{{dtype}}(ndarray[{{ctype}}, ndim=2] values, object idx): +cdef _take_2d(ndarray[take_t, ndim=2] values, object idx): cdef: Py_ssize_t i, j, N, K ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx - ndarray[{{ctype}}, ndim=2] result + ndarray[take_t, ndim=2] result object val N, K = ( values).shape - result = {{init_result}} + + if take_t is object: + # evaluated at compile-time + result = values.copy() + else: + result = np.empty_like(values) + for i in range(N): for j in range(K): result[i, j] = values[i, indexer[i, j]] return result -{{endfor}} + +_take_2d_object = _take_2d[object] +_take_2d_float64 = _take_2d[float64_t] +_take_2d_int64 = _take_2d[int64_t] +_take_2d_uint64 = _take_2d[uint64_t] From 9791c91cb2f5af5cd96f97cc572453769161fecb Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 30 Sep 2018 17:56:05 -0700 Subject: [PATCH 3/4] Remove outdated piece of docstring --- pandas/_libs/algos_common_helper.pxi.in | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 40b1b1a282670..9f531f36d1a64 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -2,7 +2,6 @@ Template for each `dtype` helper function using 1-d template # 1-d template -- map_indices - pad - pad_1d - pad_2d From b6698474b20f7ad77376e3cef0fd4c11cc9b9084 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 30 Sep 2018 18:01:09 -0700 Subject: [PATCH 4/4] Use fused type for left_join_indexer_unique --- pandas/_libs/join_helper.pxi.in | 61 ++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/pandas/_libs/join_helper.pxi.in b/pandas/_libs/join_helper.pxi.in index feb8cfb76a7f0..6ba587a5b04ea 100644 --- a/pandas/_libs/join_helper.pxi.in +++ b/pandas/_libs/join_helper.pxi.in @@ -4,42 +4,30 @@ Template for each `dtype` helper function for join WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # left_join_indexer, inner_join_indexer, outer_join_indexer -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- -{{py: - -# name, c_type, dtype -dtypes = [('float64', 'float64_t', 'np.float64'), - ('float32', 'float32_t', 'np.float32'), - ('object', 'object', 'object'), - ('int32', 'int32_t', 'np.int32'), - ('int64', 'int64_t', 'np.int64'), - ('uint64', 'uint64_t', 'np.uint64')] - -def get_dispatch(dtypes): - - for name, c_type, dtype in dtypes: - yield name, c_type, dtype - -}} +ctypedef fused join_t: + float64_t + float32_t + object + int32_t + int64_t + uint64_t -{{for name, c_type, dtype in get_dispatch(dtypes)}} # Joins on ordered, unique indices # right might contain non-unique values - @cython.wraparound(False) @cython.boundscheck(False) -def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left, - ndarray[{{c_type}}] right): +def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right): cdef: Py_ssize_t i, j, nleft, nright ndarray[int64_t] indexer - {{c_type}} lval, rval + join_t lval, rval i = 0 j = 0 @@ -78,6 +66,33 @@ def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left, return indexer +left_join_indexer_unique_float64 = left_join_indexer_unique["float64_t"] +left_join_indexer_unique_float32 = left_join_indexer_unique["float32_t"] +left_join_indexer_unique_object = left_join_indexer_unique["object"] +left_join_indexer_unique_int32 = left_join_indexer_unique["int32_t"] +left_join_indexer_unique_int64 = left_join_indexer_unique["int64_t"] +left_join_indexer_unique_uint64 = left_join_indexer_unique["uint64_t"] + + +{{py: + +# name, c_type, dtype +dtypes = [('float64', 'float64_t', 'np.float64'), + ('float32', 'float32_t', 'np.float32'), + ('object', 'object', 'object'), + ('int32', 'int32_t', 'np.int32'), + ('int64', 'int64_t', 'np.int64'), + ('uint64', 'uint64_t', 'np.uint64')] + +def get_dispatch(dtypes): + + for name, c_type, dtype in dtypes: + yield name, c_type, dtype + +}} + +{{for name, c_type, dtype in get_dispatch(dtypes)}} + # @cython.wraparound(False) # @cython.boundscheck(False) def left_join_indexer_{{name}}(ndarray[{{c_type}}] left,