Skip to content

PERF: contiguity, less gil in join algos #42057

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 0 additions & 25 deletions pandas/_libs/algos_take_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,6 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
# ----------------------------------------------------------------------


@cython.wraparound(False)
@cython.boundscheck(False)
def take_1d_intp_intp(
const intp_t[:] values,
const intp_t[:] indexer,
intp_t[::1] out,
intp_t fill_value=-1,
):
cdef:
Py_ssize_t i, n, idx
intp_t fv

n = indexer.shape[0]

fv = fill_value

with nogil:
for i in range(n):
idx = indexer[i]
if idx == -1:
out[i] = fv
else:
out[i] = values[idx]


{{py:

# c_type_in, c_type_out
Expand Down
114 changes: 58 additions & 56 deletions pandas/_libs/join.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,22 @@ from numpy cimport (

cnp.import_array()

from pandas._libs.algos import (
groupsort_indexer,
take_1d_int64_int64,
take_1d_intp_intp,
)
from pandas._libs.algos import groupsort_indexer


@cython.wraparound(False)
@cython.boundscheck(False)
def inner_join(const intp_t[:] left, const intp_t[:] right,
Py_ssize_t max_groups):
cdef:
Py_ssize_t i, j, k, count = 0
ndarray[intp_t] left_sorter, right_sorter
ndarray[intp_t] left_count, right_count
ndarray[intp_t] left_indexer, right_indexer
intp_t[::1] left_sorter, right_sorter
intp_t[::1] left_count, right_count
intp_t[::1] left_indexer, right_indexer
intp_t lc, rc
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
Py_ssize_t left_pos = 0, right_pos = 0, position = 0
Py_ssize_t offset

# NA group in location 0

left_sorter, left_count = groupsort_indexer(left, max_groups)
right_sorter, right_count = groupsort_indexer(right, max_groups)

Expand All @@ -53,14 +48,13 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
if rc > 0 and lc > 0:
count += lc * rc

# exclude the NA group
left_pos = left_count[0]
right_pos = right_count[0]

left_indexer = np.empty(count, dtype=np.intp)
right_indexer = np.empty(count, dtype=np.intp)

with nogil:
# exclude the NA group
left_pos = left_count[0]
right_pos = right_count[0]
for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]
Expand All @@ -75,24 +69,27 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
left_pos += lc
right_pos += rc

return (_get_result_indexer(left_sorter, left_indexer),
_get_result_indexer(right_sorter, right_indexer))
# Will overwrite left/right indexer with the result
_get_result_indexer(left_sorter, left_indexer)
_get_result_indexer(right_sorter, right_indexer)

return np.asarray(left_indexer), np.asarray(right_indexer)


@cython.wraparound(False)
@cython.boundscheck(False)
def left_outer_join(const intp_t[:] left, const intp_t[:] right,
Py_ssize_t max_groups, bint sort=True):
cdef:
Py_ssize_t i, j, k, count = 0
ndarray[intp_t] left_count, right_count
ndarray[intp_t] rev, left_sorter, right_sorter
ndarray[intp_t] left_indexer, right_indexer
ndarray[intp_t] rev
intp_t[::1] left_count, right_count
intp_t[::1] left_sorter, right_sorter
intp_t[::1] left_indexer, right_indexer
intp_t lc, rc
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
Py_ssize_t left_pos = 0, right_pos = 0, position = 0
Py_ssize_t offset

# NA group in location 0

left_sorter, left_count = groupsort_indexer(left, max_groups)
right_sorter, right_count = groupsort_indexer(right, max_groups)

Expand All @@ -104,14 +101,13 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
else:
count += left_count[i]

# exclude the NA group
left_pos = left_count[0]
right_pos = right_count[0]

left_indexer = np.empty(count, dtype=np.intp)
right_indexer = np.empty(count, dtype=np.intp)

with nogil:
# exclude the NA group
left_pos = left_count[0]
right_pos = right_count[0]
for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]
Expand All @@ -131,40 +127,38 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
left_pos += lc
right_pos += rc

left_indexer = _get_result_indexer(left_sorter, left_indexer)
right_indexer = _get_result_indexer(right_sorter, right_indexer)
# Will overwrite left/right indexer with the result
_get_result_indexer(left_sorter, left_indexer)
_get_result_indexer(right_sorter, right_indexer)

if not sort: # if not asked to sort, revert to original order
# cast to avoid build warning GH#26757
if <Py_ssize_t>len(left) == len(left_indexer):
if len(left) == len(left_indexer):
# no multiple matches for any row on the left
# this is a short-cut to avoid groupsort_indexer
# otherwise, the `else` path also works in this case
rev = np.empty(len(left), dtype=np.intp)
rev.put(left_sorter, np.arange(len(left)))
rev.put(np.asarray(left_sorter), np.arange(len(left)))
else:
rev, _ = groupsort_indexer(left_indexer, len(left))

right_indexer = right_indexer.take(rev)
left_indexer = left_indexer.take(rev)

return left_indexer, right_indexer
return np.asarray(left_indexer).take(rev), np.asarray(right_indexer).take(rev)
else:
return np.asarray(left_indexer), np.asarray(right_indexer)


@cython.wraparound(False)
@cython.boundscheck(False)
def full_outer_join(const intp_t[:] left, const intp_t[:] right,
Py_ssize_t max_groups):
cdef:
Py_ssize_t i, j, k, count = 0
ndarray[intp_t] left_sorter, right_sorter
ndarray[intp_t] left_count, right_count
ndarray[intp_t] left_indexer, right_indexer
intp_t[::1] left_sorter, right_sorter
intp_t[::1] left_count, right_count
intp_t[::1] left_indexer, right_indexer
intp_t lc, rc
intp_t left_pos = 0, right_pos = 0
Py_ssize_t offset, position = 0

# NA group in location 0

left_sorter, left_count = groupsort_indexer(left, max_groups)
right_sorter, right_count = groupsort_indexer(right, max_groups)

Expand All @@ -179,14 +173,13 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
else:
count += lc + rc

# exclude the NA group
left_pos = left_count[0]
right_pos = right_count[0]

left_indexer = np.empty(count, dtype=np.intp)
right_indexer = np.empty(count, dtype=np.intp)

with nogil:
# exclude the NA group
left_pos = left_count[0]
right_pos = right_count[0]
for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]
Expand All @@ -211,24 +204,33 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
left_pos += lc
right_pos += rc

return (_get_result_indexer(left_sorter, left_indexer),
_get_result_indexer(right_sorter, right_indexer))
# Will overwrite left/right indexer with the result
_get_result_indexer(left_sorter, left_indexer)
_get_result_indexer(right_sorter, right_indexer)

return np.asarray(left_indexer), np.asarray(right_indexer)


cdef ndarray[intp_t] _get_result_indexer(
ndarray[intp_t] sorter, ndarray[intp_t] indexer
):
@cython.wraparound(False)
@cython.boundscheck(False)
cdef void _get_result_indexer(intp_t[::1] sorter, intp_t[::1] indexer) nogil:
"""NOTE: overwrites indexer with the result to avoid allocating another array"""
cdef:
Py_ssize_t i, n, idx

if len(sorter) > 0:
# cython-only equivalent to
# `res = algos.take_nd(sorter, indexer, fill_value=-1)`
res = np.empty(len(indexer), dtype=np.intp)
take_1d_intp_intp(sorter, indexer, res, -1)
n = indexer.shape[0]
for i in range(n):
idx = indexer[i]
if idx == -1:
indexer[i] = -1
else:
indexer[i] = sorter[idx]
else:
# length-0 case
res = np.empty(len(indexer), dtype=np.intp)
res[:] = -1

return res
indexer[:] = -1


def ffill_indexer(const intp_t[:] indexer) -> np.ndarray:
Expand Down