Skip to content

Use fused types for _take_2d #22917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Template for each `dtype` helper function using 1-d template

# 1-d template
- map_indices
- pad
- pad_1d
- pad_2d
Expand Down
9 changes: 0 additions & 9 deletions pandas/_libs/algos_rank_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,8 @@ dtypes = [('object', 'object', 'Infinity()', 'NegInfinity()'),

@cython.wraparound(False)
@cython.boundscheck(False)
{{if dtype == 'object'}}


def rank_1d_{{dtype}}(object in_arr, ties_method='average',
ascending=True, na_option='keep', pct=False):
{{else}}


def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
na_option='keep', pct=False):
{{endif}}
"""
Fast NaN-friendly version of scipy.stats.rankdata
"""
Expand Down
36 changes: 21 additions & 15 deletions pandas/_libs/algos_take_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -260,33 +260,39 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,

{{endfor}}

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# take_2d internal function
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

{{py:

# dtype, ctype, init_result
dtypes = [('float64', 'float64_t', 'np.empty_like(values)'),
('uint64', 'uint64_t', 'np.empty_like(values)'),
('object', 'object', 'values.copy()'),
('int64', 'int64_t', 'np.empty_like(values)')]
}}
ctypedef fused take_t:
float64_t
uint64_t
int64_t
object

{{for dtype, ctype, init_result in dtypes}}

cdef _take_2d_{{dtype}}(ndarray[{{ctype}}, ndim=2] values, object idx):
cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
cdef:
Py_ssize_t i, j, N, K
ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx
ndarray[{{ctype}}, ndim=2] result
ndarray[take_t, ndim=2] result
object val

N, K = (<object> values).shape
result = {{init_result}}

if take_t is object:
# evaluated at compile-time
result = values.copy()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

empty_like doesn't work?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just maintaining the status quo, so I didn't try it the other way.

else:
result = np.empty_like(values)

for i in range(N):
for j in range(K):
result[i, j] = values[i, indexer[i, j]]
return result

{{endfor}}

_take_2d_object = _take_2d[object]
_take_2d_float64 = _take_2d[float64_t]
_take_2d_int64 = _take_2d[int64_t]
_take_2d_uint64 = _take_2d[uint64_t]
44 changes: 22 additions & 22 deletions pandas/_libs/join_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,21 @@ def asof_join_backward_{{on_dtype}}_by_{{by_dtype}}(

# find last position in right whose value is less than left's
if allow_exact_matches:
while right_pos < right_size and\
right_values[right_pos] <= left_values[left_pos]:
while (right_pos < right_size and
right_values[right_pos] <= left_values[left_pos]):
hash_table.set_item(right_by_values[right_pos], right_pos)
right_pos += 1
else:
while right_pos < right_size and\
right_values[right_pos] < left_values[left_pos]:
while (right_pos < right_size and
right_values[right_pos] < left_values[left_pos]):
hash_table.set_item(right_by_values[right_pos], right_pos)
right_pos += 1
right_pos -= 1

# save positions as the desired index
by_value = left_by_values[left_pos]
found_right_pos = hash_table.get_item(by_value)\
if by_value in hash_table else -1
found_right_pos = (hash_table.get_item(by_value)
if by_value in hash_table else -1)
left_indexer[left_pos] = left_pos
right_indexer[left_pos] = found_right_pos

Expand Down Expand Up @@ -133,21 +133,21 @@ def asof_join_forward_{{on_dtype}}_by_{{by_dtype}}(

# find first position in right whose value is greater than left's
if allow_exact_matches:
while right_pos >= 0 and\
right_values[right_pos] >= left_values[left_pos]:
while (right_pos >= 0 and
right_values[right_pos] >= left_values[left_pos]):
hash_table.set_item(right_by_values[right_pos], right_pos)
right_pos -= 1
else:
while right_pos >= 0 and\
right_values[right_pos] > left_values[left_pos]:
while (right_pos >= 0 and
right_values[right_pos] > left_values[left_pos]):
hash_table.set_item(right_by_values[right_pos], right_pos)
right_pos -= 1
right_pos += 1

# save positions as the desired index
by_value = left_by_values[left_pos]
found_right_pos = hash_table.get_item(by_value)\
if by_value in hash_table else -1
found_right_pos = (hash_table.get_item(by_value)
if by_value in hash_table else -1)
left_indexer[left_pos] = left_pos
right_indexer[left_pos] = found_right_pos

Expand Down Expand Up @@ -259,12 +259,12 @@ def asof_join_backward_{{on_dtype}}(

# find last position in right whose value is less than left's
if allow_exact_matches:
while right_pos < right_size and\
right_values[right_pos] <= left_values[left_pos]:
while (right_pos < right_size and
right_values[right_pos] <= left_values[left_pos]):
right_pos += 1
else:
while right_pos < right_size and\
right_values[right_pos] < left_values[left_pos]:
while (right_pos < right_size and
right_values[right_pos] < left_values[left_pos]):
right_pos += 1
right_pos -= 1

Expand Down Expand Up @@ -313,19 +313,19 @@ def asof_join_forward_{{on_dtype}}(

# find first position in right whose value is greater than left's
if allow_exact_matches:
while right_pos >= 0 and\
right_values[right_pos] >= left_values[left_pos]:
while (right_pos >= 0 and
right_values[right_pos] >= left_values[left_pos]):
right_pos -= 1
else:
while right_pos >= 0 and\
right_values[right_pos] > left_values[left_pos]:
while (right_pos >= 0 and
right_values[right_pos] > left_values[left_pos]):
right_pos -= 1
right_pos += 1

# save positions as the desired index
left_indexer[left_pos] = left_pos
right_indexer[left_pos] = right_pos\
if right_pos != right_size else -1
right_indexer[left_pos] = (right_pos
if right_pos != right_size else -1)

# if needed, verify that tolerance is met
if has_tolerance and right_pos != right_size:
Expand Down
61 changes: 38 additions & 23 deletions pandas/_libs/join_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,30 @@ Template for each `dtype` helper function for join
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# left_join_indexer, inner_join_indexer, outer_join_indexer
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

{{py:

# name, c_type, dtype
dtypes = [('float64', 'float64_t', 'np.float64'),
('float32', 'float32_t', 'np.float32'),
('object', 'object', 'object'),
('int32', 'int32_t', 'np.int32'),
('int64', 'int64_t', 'np.int64'),
('uint64', 'uint64_t', 'np.uint64')]

def get_dispatch(dtypes):

for name, c_type, dtype in dtypes:
yield name, c_type, dtype

}}
ctypedef fused join_t:
float64_t
float32_t
object
int32_t
int64_t
uint64_t

{{for name, c_type, dtype in get_dispatch(dtypes)}}

# Joins on ordered, unique indices

# right might contain non-unique values


@cython.wraparound(False)
@cython.boundscheck(False)
def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left,
ndarray[{{c_type}}] right):
def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right):
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t] indexer
{{c_type}} lval, rval
join_t lval, rval

i = 0
j = 0
Expand Down Expand Up @@ -78,6 +66,33 @@ def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left,
return indexer


left_join_indexer_unique_float64 = left_join_indexer_unique["float64_t"]
left_join_indexer_unique_float32 = left_join_indexer_unique["float32_t"]
left_join_indexer_unique_object = left_join_indexer_unique["object"]
left_join_indexer_unique_int32 = left_join_indexer_unique["int32_t"]
left_join_indexer_unique_int64 = left_join_indexer_unique["int64_t"]
left_join_indexer_unique_uint64 = left_join_indexer_unique["uint64_t"]


{{py:

# name, c_type, dtype
dtypes = [('float64', 'float64_t', 'np.float64'),
('float32', 'float32_t', 'np.float32'),
('object', 'object', 'object'),
('int32', 'int32_t', 'np.int32'),
('int64', 'int64_t', 'np.int64'),
('uint64', 'uint64_t', 'np.uint64')]

def get_dispatch(dtypes):

for name, c_type, dtype in dtypes:
yield name, c_type, dtype

}}

{{for name, c_type, dtype in get_dispatch(dtypes)}}

# @cython.wraparound(False)
# @cython.boundscheck(False)
def left_join_indexer_{{name}}(ndarray[{{c_type}}] left,
Expand Down