Skip to content

Commit 4aa69fd

Browse files
Backport PR pandas-dev#42057: PERF: contiguity, less gil in join algos (pandas-dev#42075)
Co-authored-by: Matthew Zeitlin <[email protected]>
1 parent 571147f commit 4aa69fd

File tree

2 files changed

+58
-81
lines changed

2 files changed

+58
-81
lines changed

pandas/_libs/algos_take_helper.pxi.in

-25
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,6 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
99
# ----------------------------------------------------------------------
1010

1111

12-
@cython.wraparound(False)
13-
@cython.boundscheck(False)
14-
def take_1d_intp_intp(
15-
const intp_t[:] values,
16-
const intp_t[:] indexer,
17-
intp_t[::1] out,
18-
intp_t fill_value=-1,
19-
):
20-
cdef:
21-
Py_ssize_t i, n, idx
22-
intp_t fv
23-
24-
n = indexer.shape[0]
25-
26-
fv = fill_value
27-
28-
with nogil:
29-
for i in range(n):
30-
idx = indexer[i]
31-
if idx == -1:
32-
out[i] = fv
33-
else:
34-
out[i] = values[idx]
35-
36-
3712
{{py:
3813

3914
# c_type_in, c_type_out

pandas/_libs/join.pyx

+58-56
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,22 @@ from numpy cimport (
2020

2121
cnp.import_array()
2222

23-
from pandas._libs.algos import (
24-
groupsort_indexer,
25-
take_1d_int64_int64,
26-
take_1d_intp_intp,
27-
)
23+
from pandas._libs.algos import groupsort_indexer
2824

2925

26+
@cython.wraparound(False)
3027
@cython.boundscheck(False)
3128
def inner_join(const intp_t[:] left, const intp_t[:] right,
3229
Py_ssize_t max_groups):
3330
cdef:
3431
Py_ssize_t i, j, k, count = 0
35-
ndarray[intp_t] left_sorter, right_sorter
36-
ndarray[intp_t] left_count, right_count
37-
ndarray[intp_t] left_indexer, right_indexer
32+
intp_t[::1] left_sorter, right_sorter
33+
intp_t[::1] left_count, right_count
34+
intp_t[::1] left_indexer, right_indexer
3835
intp_t lc, rc
39-
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
36+
Py_ssize_t left_pos = 0, right_pos = 0, position = 0
4037
Py_ssize_t offset
4138

42-
# NA group in location 0
43-
4439
left_sorter, left_count = groupsort_indexer(left, max_groups)
4540
right_sorter, right_count = groupsort_indexer(right, max_groups)
4641

@@ -53,14 +48,13 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
5348
if rc > 0 and lc > 0:
5449
count += lc * rc
5550

56-
# exclude the NA group
57-
left_pos = left_count[0]
58-
right_pos = right_count[0]
59-
6051
left_indexer = np.empty(count, dtype=np.intp)
6152
right_indexer = np.empty(count, dtype=np.intp)
6253

6354
with nogil:
55+
# exclude the NA group
56+
left_pos = left_count[0]
57+
right_pos = right_count[0]
6458
for i in range(1, max_groups + 1):
6559
lc = left_count[i]
6660
rc = right_count[i]
@@ -75,24 +69,27 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
7569
left_pos += lc
7670
right_pos += rc
7771

78-
return (_get_result_indexer(left_sorter, left_indexer),
79-
_get_result_indexer(right_sorter, right_indexer))
72+
# Will overwrite left/right indexer with the result
73+
_get_result_indexer(left_sorter, left_indexer)
74+
_get_result_indexer(right_sorter, right_indexer)
75+
76+
return np.asarray(left_indexer), np.asarray(right_indexer)
8077

8178

79+
@cython.wraparound(False)
8280
@cython.boundscheck(False)
8381
def left_outer_join(const intp_t[:] left, const intp_t[:] right,
8482
Py_ssize_t max_groups, bint sort=True):
8583
cdef:
8684
Py_ssize_t i, j, k, count = 0
87-
ndarray[intp_t] left_count, right_count
88-
ndarray[intp_t] rev, left_sorter, right_sorter
89-
ndarray[intp_t] left_indexer, right_indexer
85+
ndarray[intp_t] rev
86+
intp_t[::1] left_count, right_count
87+
intp_t[::1] left_sorter, right_sorter
88+
intp_t[::1] left_indexer, right_indexer
9089
intp_t lc, rc
91-
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
90+
Py_ssize_t left_pos = 0, right_pos = 0, position = 0
9291
Py_ssize_t offset
9392

94-
# NA group in location 0
95-
9693
left_sorter, left_count = groupsort_indexer(left, max_groups)
9794
right_sorter, right_count = groupsort_indexer(right, max_groups)
9895

@@ -104,14 +101,13 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
104101
else:
105102
count += left_count[i]
106103

107-
# exclude the NA group
108-
left_pos = left_count[0]
109-
right_pos = right_count[0]
110-
111104
left_indexer = np.empty(count, dtype=np.intp)
112105
right_indexer = np.empty(count, dtype=np.intp)
113106

114107
with nogil:
108+
# exclude the NA group
109+
left_pos = left_count[0]
110+
right_pos = right_count[0]
115111
for i in range(1, max_groups + 1):
116112
lc = left_count[i]
117113
rc = right_count[i]
@@ -131,40 +127,38 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
131127
left_pos += lc
132128
right_pos += rc
133129

134-
left_indexer = _get_result_indexer(left_sorter, left_indexer)
135-
right_indexer = _get_result_indexer(right_sorter, right_indexer)
130+
# Will overwrite left/right indexer with the result
131+
_get_result_indexer(left_sorter, left_indexer)
132+
_get_result_indexer(right_sorter, right_indexer)
136133

137134
if not sort: # if not asked to sort, revert to original order
138-
# cast to avoid build warning GH#26757
139-
if <Py_ssize_t>len(left) == len(left_indexer):
135+
if len(left) == len(left_indexer):
140136
# no multiple matches for any row on the left
141137
# this is a short-cut to avoid groupsort_indexer
142138
# otherwise, the `else` path also works in this case
143139
rev = np.empty(len(left), dtype=np.intp)
144-
rev.put(left_sorter, np.arange(len(left)))
140+
rev.put(np.asarray(left_sorter), np.arange(len(left)))
145141
else:
146142
rev, _ = groupsort_indexer(left_indexer, len(left))
147143

148-
right_indexer = right_indexer.take(rev)
149-
left_indexer = left_indexer.take(rev)
150-
151-
return left_indexer, right_indexer
144+
return np.asarray(left_indexer).take(rev), np.asarray(right_indexer).take(rev)
145+
else:
146+
return np.asarray(left_indexer), np.asarray(right_indexer)
152147

153148

149+
@cython.wraparound(False)
154150
@cython.boundscheck(False)
155151
def full_outer_join(const intp_t[:] left, const intp_t[:] right,
156152
Py_ssize_t max_groups):
157153
cdef:
158154
Py_ssize_t i, j, k, count = 0
159-
ndarray[intp_t] left_sorter, right_sorter
160-
ndarray[intp_t] left_count, right_count
161-
ndarray[intp_t] left_indexer, right_indexer
155+
intp_t[::1] left_sorter, right_sorter
156+
intp_t[::1] left_count, right_count
157+
intp_t[::1] left_indexer, right_indexer
162158
intp_t lc, rc
163159
intp_t left_pos = 0, right_pos = 0
164160
Py_ssize_t offset, position = 0
165161

166-
# NA group in location 0
167-
168162
left_sorter, left_count = groupsort_indexer(left, max_groups)
169163
right_sorter, right_count = groupsort_indexer(right, max_groups)
170164

@@ -179,14 +173,13 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
179173
else:
180174
count += lc + rc
181175

182-
# exclude the NA group
183-
left_pos = left_count[0]
184-
right_pos = right_count[0]
185-
186176
left_indexer = np.empty(count, dtype=np.intp)
187177
right_indexer = np.empty(count, dtype=np.intp)
188178

189179
with nogil:
180+
# exclude the NA group
181+
left_pos = left_count[0]
182+
right_pos = right_count[0]
190183
for i in range(1, max_groups + 1):
191184
lc = left_count[i]
192185
rc = right_count[i]
@@ -211,24 +204,33 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
211204
left_pos += lc
212205
right_pos += rc
213206

214-
return (_get_result_indexer(left_sorter, left_indexer),
215-
_get_result_indexer(right_sorter, right_indexer))
207+
# Will overwrite left/right indexer with the result
208+
_get_result_indexer(left_sorter, left_indexer)
209+
_get_result_indexer(right_sorter, right_indexer)
210+
211+
return np.asarray(left_indexer), np.asarray(right_indexer)
216212

217213

218-
cdef ndarray[intp_t] _get_result_indexer(
219-
ndarray[intp_t] sorter, ndarray[intp_t] indexer
220-
):
214+
@cython.wraparound(False)
215+
@cython.boundscheck(False)
216+
cdef void _get_result_indexer(intp_t[::1] sorter, intp_t[::1] indexer) nogil:
217+
"""NOTE: overwrites indexer with the result to avoid allocating another array"""
218+
cdef:
219+
Py_ssize_t i, n, idx
220+
221221
if len(sorter) > 0:
222222
# cython-only equivalent to
223223
# `res = algos.take_nd(sorter, indexer, fill_value=-1)`
224-
res = np.empty(len(indexer), dtype=np.intp)
225-
take_1d_intp_intp(sorter, indexer, res, -1)
224+
n = indexer.shape[0]
225+
for i in range(n):
226+
idx = indexer[i]
227+
if idx == -1:
228+
indexer[i] = -1
229+
else:
230+
indexer[i] = sorter[idx]
226231
else:
227232
# length-0 case
228-
res = np.empty(len(indexer), dtype=np.intp)
229-
res[:] = -1
230-
231-
return res
233+
indexer[:] = -1
232234

233235

234236
def ffill_indexer(const intp_t[:] indexer) -> np.ndarray:

0 commit comments

Comments
 (0)