@@ -20,27 +20,22 @@ from numpy cimport (
20
20
21
21
cnp.import_array()
22
22
23
- from pandas._libs.algos import (
24
- groupsort_indexer,
25
- take_1d_int64_int64,
26
- take_1d_intp_intp,
27
- )
23
+ from pandas._libs.algos import groupsort_indexer
28
24
29
25
26
+ @ cython.wraparound (False )
30
27
@ cython.boundscheck (False )
31
28
def inner_join (const intp_t[:] left , const intp_t[:] right ,
32
29
Py_ssize_t max_groups ):
33
30
cdef:
34
31
Py_ssize_t i, j, k, count = 0
35
- ndarray[ intp_t] left_sorter, right_sorter
36
- ndarray[ intp_t] left_count, right_count
37
- ndarray[ intp_t] left_indexer, right_indexer
32
+ intp_t[:: 1 ] left_sorter, right_sorter
33
+ intp_t[:: 1 ] left_count, right_count
34
+ intp_t[:: 1 ] left_indexer, right_indexer
38
35
intp_t lc, rc
39
- Py_ssize_t loc, left_pos = 0 , right_pos = 0 , position = 0
36
+ Py_ssize_t left_pos = 0 , right_pos = 0 , position = 0
40
37
Py_ssize_t offset
41
38
42
- # NA group in location 0
43
-
44
39
left_sorter, left_count = groupsort_indexer(left, max_groups)
45
40
right_sorter, right_count = groupsort_indexer(right, max_groups)
46
41
@@ -53,14 +48,13 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
53
48
if rc > 0 and lc > 0 :
54
49
count += lc * rc
55
50
56
- # exclude the NA group
57
- left_pos = left_count[0 ]
58
- right_pos = right_count[0 ]
59
-
60
51
left_indexer = np.empty(count, dtype = np.intp)
61
52
right_indexer = np.empty(count, dtype = np.intp)
62
53
63
54
with nogil:
55
+ # exclude the NA group
56
+ left_pos = left_count[0 ]
57
+ right_pos = right_count[0 ]
64
58
for i in range (1 , max_groups + 1 ):
65
59
lc = left_count[i]
66
60
rc = right_count[i]
@@ -75,24 +69,27 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
75
69
left_pos += lc
76
70
right_pos += rc
77
71
78
- return (_get_result_indexer(left_sorter, left_indexer),
79
- _get_result_indexer(right_sorter, right_indexer))
72
+ # Will overwrite left/right indexer with the result
73
+ _get_result_indexer(left_sorter, left_indexer)
74
+ _get_result_indexer(right_sorter, right_indexer)
75
+
76
+ return np.asarray(left_indexer), np.asarray(right_indexer)
80
77
81
78
79
+ @ cython.wraparound (False )
82
80
@ cython.boundscheck (False )
83
81
def left_outer_join (const intp_t[:] left , const intp_t[:] right ,
84
82
Py_ssize_t max_groups , bint sort = True ):
85
83
cdef:
86
84
Py_ssize_t i, j, k, count = 0
87
- ndarray[intp_t] left_count, right_count
88
- ndarray[intp_t] rev, left_sorter, right_sorter
89
- ndarray[intp_t] left_indexer, right_indexer
85
+ ndarray[intp_t] rev
86
+ intp_t[::1 ] left_count, right_count
87
+ intp_t[::1 ] left_sorter, right_sorter
88
+ intp_t[::1 ] left_indexer, right_indexer
90
89
intp_t lc, rc
91
- Py_ssize_t loc, left_pos = 0 , right_pos = 0 , position = 0
90
+ Py_ssize_t left_pos = 0 , right_pos = 0 , position = 0
92
91
Py_ssize_t offset
93
92
94
- # NA group in location 0
95
-
96
93
left_sorter, left_count = groupsort_indexer(left, max_groups)
97
94
right_sorter, right_count = groupsort_indexer(right, max_groups)
98
95
@@ -104,14 +101,13 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
104
101
else :
105
102
count += left_count[i]
106
103
107
- # exclude the NA group
108
- left_pos = left_count[0 ]
109
- right_pos = right_count[0 ]
110
-
111
104
left_indexer = np.empty(count, dtype = np.intp)
112
105
right_indexer = np.empty(count, dtype = np.intp)
113
106
114
107
with nogil:
108
+ # exclude the NA group
109
+ left_pos = left_count[0 ]
110
+ right_pos = right_count[0 ]
115
111
for i in range (1 , max_groups + 1 ):
116
112
lc = left_count[i]
117
113
rc = right_count[i]
@@ -131,40 +127,38 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
131
127
left_pos += lc
132
128
right_pos += rc
133
129
134
- left_indexer = _get_result_indexer(left_sorter, left_indexer)
135
- right_indexer = _get_result_indexer(right_sorter, right_indexer)
130
+ # Will overwrite left/right indexer with the result
131
+ _get_result_indexer(left_sorter, left_indexer)
132
+ _get_result_indexer(right_sorter, right_indexer)
136
133
137
134
if not sort: # if not asked to sort, revert to original order
138
- # cast to avoid build warning GH#26757
139
- if < Py_ssize_t> len (left) == len (left_indexer):
135
+ if len (left) == len (left_indexer):
140
136
# no multiple matches for any row on the left
141
137
# this is a short-cut to avoid groupsort_indexer
142
138
# otherwise, the `else` path also works in this case
143
139
rev = np.empty(len (left), dtype = np.intp)
144
- rev.put(left_sorter, np.arange(len (left)))
140
+ rev.put(np.asarray( left_sorter) , np.arange(len (left)))
145
141
else :
146
142
rev, _ = groupsort_indexer(left_indexer, len (left))
147
143
148
- right_indexer = right_indexer.take(rev)
149
- left_indexer = left_indexer.take(rev)
150
-
151
- return left_indexer, right_indexer
144
+ return np.asarray(left_indexer).take(rev), np.asarray(right_indexer).take(rev)
145
+ else :
146
+ return np.asarray(left_indexer), np.asarray(right_indexer)
152
147
153
148
149
+ @ cython.wraparound (False )
154
150
@ cython.boundscheck (False )
155
151
def full_outer_join (const intp_t[:] left , const intp_t[:] right ,
156
152
Py_ssize_t max_groups ):
157
153
cdef:
158
154
Py_ssize_t i, j, k, count = 0
159
- ndarray[ intp_t] left_sorter, right_sorter
160
- ndarray[ intp_t] left_count, right_count
161
- ndarray[ intp_t] left_indexer, right_indexer
155
+ intp_t[:: 1 ] left_sorter, right_sorter
156
+ intp_t[:: 1 ] left_count, right_count
157
+ intp_t[:: 1 ] left_indexer, right_indexer
162
158
intp_t lc, rc
163
159
intp_t left_pos = 0 , right_pos = 0
164
160
Py_ssize_t offset, position = 0
165
161
166
- # NA group in location 0
167
-
168
162
left_sorter, left_count = groupsort_indexer(left, max_groups)
169
163
right_sorter, right_count = groupsort_indexer(right, max_groups)
170
164
@@ -179,14 +173,13 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
179
173
else :
180
174
count += lc + rc
181
175
182
- # exclude the NA group
183
- left_pos = left_count[0 ]
184
- right_pos = right_count[0 ]
185
-
186
176
left_indexer = np.empty(count, dtype = np.intp)
187
177
right_indexer = np.empty(count, dtype = np.intp)
188
178
189
179
with nogil:
180
+ # exclude the NA group
181
+ left_pos = left_count[0 ]
182
+ right_pos = right_count[0 ]
190
183
for i in range (1 , max_groups + 1 ):
191
184
lc = left_count[i]
192
185
rc = right_count[i]
@@ -211,24 +204,33 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
211
204
left_pos += lc
212
205
right_pos += rc
213
206
214
- return (_get_result_indexer(left_sorter, left_indexer),
215
- _get_result_indexer(right_sorter, right_indexer))
207
+ # Will overwrite left/right indexer with the result
208
+ _get_result_indexer(left_sorter, left_indexer)
209
+ _get_result_indexer(right_sorter, right_indexer)
210
+
211
+ return np.asarray(left_indexer), np.asarray(right_indexer)
216
212
217
213
218
- cdef ndarray[intp_t] _get_result_indexer(
219
- ndarray[intp_t] sorter, ndarray[intp_t] indexer
220
- ):
214
+ @ cython.wraparound (False )
215
+ @ cython.boundscheck (False )
216
+ cdef void _get_result_indexer(intp_t[::1 ] sorter, intp_t[::1 ] indexer) nogil:
217
+ """ NOTE: overwrites indexer with the result to avoid allocating another array"""
218
+ cdef:
219
+ Py_ssize_t i, n, idx
220
+
221
221
if len (sorter) > 0 :
222
222
# cython-only equivalent to
223
223
# `res = algos.take_nd(sorter, indexer, fill_value=-1)`
224
- res = np.empty(len (indexer), dtype = np.intp)
225
- take_1d_intp_intp(sorter, indexer, res, - 1 )
224
+ n = indexer.shape[0 ]
225
+ for i in range (n):
226
+ idx = indexer[i]
227
+ if idx == - 1 :
228
+ indexer[i] = - 1
229
+ else :
230
+ indexer[i] = sorter[idx]
226
231
else :
227
232
# length-0 case
228
- res = np.empty(len (indexer), dtype = np.intp)
229
- res[:] = - 1
230
-
231
- return res
233
+ indexer[:] = - 1
232
234
233
235
234
236
def ffill_indexer (const intp_t[:] indexer ) -> np.ndarray:
0 commit comments