@@ -21,10 +21,9 @@ from numpy cimport (
21
21
cnp.import_array()
22
22
23
23
from pandas._libs.algos import (
24
- ensure_int64,
25
- ensure_platform_int,
26
24
groupsort_indexer,
27
25
take_1d_int64_int64,
26
+ take_1d_intp_intp,
28
27
)
29
28
30
29
@@ -34,16 +33,16 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
34
33
cdef:
35
34
Py_ssize_t i, j, k, count = 0
36
35
ndarray[intp_t] left_sorter, right_sorter
37
- ndarray[int64_t ] left_count, right_count
38
- ndarray[int64_t ] left_indexer, right_indexer
39
- int64_t lc, rc
36
+ ndarray[intp_t ] left_count, right_count
37
+ ndarray[intp_t ] left_indexer, right_indexer
38
+ intp_t lc, rc
40
39
Py_ssize_t loc, left_pos = 0 , right_pos = 0 , position = 0
41
40
Py_ssize_t offset
42
41
43
42
# NA group in location 0
44
43
45
- left_sorter, left_count = groupsort_indexer(ensure_int64( left) , max_groups)
46
- right_sorter, right_count = groupsort_indexer(ensure_int64( right) , max_groups)
44
+ left_sorter, left_count = groupsort_indexer(left, max_groups)
45
+ right_sorter, right_count = groupsort_indexer(right, max_groups)
47
46
48
47
with nogil:
49
48
# First pass, determine size of result set, do not use the NA group
@@ -58,8 +57,8 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
58
57
left_pos = left_count[0 ]
59
58
right_pos = right_count[0 ]
60
59
61
- left_indexer = np.empty(count, dtype = np.int64 )
62
- right_indexer = np.empty(count, dtype = np.int64 )
60
+ left_indexer = np.empty(count, dtype = np.intp )
61
+ right_indexer = np.empty(count, dtype = np.intp )
63
62
64
63
with nogil:
65
64
for i in range (1 , max_groups + 1 ):
@@ -85,17 +84,17 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
85
84
Py_ssize_t max_groups , bint sort = True ):
86
85
cdef:
87
86
Py_ssize_t i, j, k, count = 0
88
- ndarray[int64_t ] left_count, right_count
87
+ ndarray[intp_t ] left_count, right_count
89
88
ndarray[intp_t] rev, left_sorter, right_sorter
90
- ndarray[int64_t ] left_indexer, right_indexer
91
- int64_t lc, rc
89
+ ndarray[intp_t ] left_indexer, right_indexer
90
+ intp_t lc, rc
92
91
Py_ssize_t loc, left_pos = 0 , right_pos = 0 , position = 0
93
92
Py_ssize_t offset
94
93
95
94
# NA group in location 0
96
95
97
- left_sorter, left_count = groupsort_indexer(ensure_int64( left) , max_groups)
98
- right_sorter, right_count = groupsort_indexer(ensure_int64( right) , max_groups)
96
+ left_sorter, left_count = groupsort_indexer(left, max_groups)
97
+ right_sorter, right_count = groupsort_indexer(right, max_groups)
99
98
100
99
with nogil:
101
100
# First pass, determine size of result set, do not use the NA group
@@ -109,8 +108,8 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
109
108
left_pos = left_count[0 ]
110
109
right_pos = right_count[0 ]
111
110
112
- left_indexer = np.empty(count, dtype = np.int64 )
113
- right_indexer = np.empty(count, dtype = np.int64 )
111
+ left_indexer = np.empty(count, dtype = np.intp )
112
+ right_indexer = np.empty(count, dtype = np.intp )
114
113
115
114
with nogil:
116
115
for i in range (1 , max_groups + 1 ):
@@ -142,11 +141,10 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
142
141
# this is a short-cut to avoid groupsort_indexer
143
142
# otherwise, the `else` path also works in this case
144
143
rev = np.empty(len (left), dtype = np.intp)
145
- rev.put(ensure_platform_int( left_sorter) , np.arange(len (left)))
144
+ rev.put(left_sorter, np.arange(len (left)))
146
145
else :
147
146
rev, _ = groupsort_indexer(left_indexer, len (left))
148
147
149
- rev = ensure_platform_int(rev)
150
148
right_indexer = right_indexer.take(rev)
151
149
left_indexer = left_indexer.take(rev)
152
150
@@ -159,16 +157,16 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
159
157
cdef:
160
158
Py_ssize_t i, j, k, count = 0
161
159
ndarray[intp_t] left_sorter, right_sorter
162
- ndarray[int64_t ] left_count, right_count
163
- ndarray[int64_t ] left_indexer, right_indexer
164
- int64_t lc, rc
165
- int64_t left_pos = 0 , right_pos = 0
160
+ ndarray[intp_t ] left_count, right_count
161
+ ndarray[intp_t ] left_indexer, right_indexer
162
+ intp_t lc, rc
163
+ intp_t left_pos = 0 , right_pos = 0
166
164
Py_ssize_t offset, position = 0
167
165
168
166
# NA group in location 0
169
167
170
- left_sorter, left_count = groupsort_indexer(ensure_int64( left) , max_groups)
171
- right_sorter, right_count = groupsort_indexer(ensure_int64( right) , max_groups)
168
+ left_sorter, left_count = groupsort_indexer(left, max_groups)
169
+ right_sorter, right_count = groupsort_indexer(right, max_groups)
172
170
173
171
with nogil:
174
172
# First pass, determine size of result set, do not use the NA group
@@ -185,8 +183,8 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
185
183
left_pos = left_count[0 ]
186
184
right_pos = right_count[0 ]
187
185
188
- left_indexer = np.empty(count, dtype = np.int64 )
189
- right_indexer = np.empty(count, dtype = np.int64 )
186
+ left_indexer = np.empty(count, dtype = np.intp )
187
+ right_indexer = np.empty(count, dtype = np.intp )
190
188
191
189
with nogil:
192
190
for i in range (1 , max_groups + 1 ):
@@ -217,31 +215,29 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
217
215
_get_result_indexer(right_sorter, right_indexer))
218
216
219
217
220
- cdef ndarray[int64_t ] _get_result_indexer(
221
- ndarray[intp_t] sorter, ndarray[int64_t ] indexer
218
+ cdef ndarray[intp_t ] _get_result_indexer(
219
+ ndarray[intp_t] sorter, ndarray[intp_t ] indexer
222
220
):
223
221
if len (sorter) > 0 :
224
222
# cython-only equivalent to
225
223
# `res = algos.take_nd(sorter, indexer, fill_value=-1)`
226
- res = np.empty(len (indexer), dtype = np.int64)
227
- take_1d_int64_int64(ensure_int64(sorter), ensure_platform_int(indexer), res, - 1 )
228
- # FIXME: sorter is intp_t, not int64_t, opposite for indexer;
229
- # will this break on 32bit builds?
224
+ res = np.empty(len (indexer), dtype = np.intp)
225
+ take_1d_intp_intp(sorter, indexer, res, - 1 )
230
226
else :
231
227
# length-0 case
232
- res = np.empty(len (indexer), dtype = np.int64 )
228
+ res = np.empty(len (indexer), dtype = np.intp )
233
229
res[:] = - 1
234
230
235
231
return res
236
232
237
233
238
- def ffill_indexer (const int64_t [:] indexer ):
234
+ def ffill_indexer (const intp_t [:] indexer ):
239
235
cdef:
240
236
Py_ssize_t i, n = len (indexer)
241
- ndarray[int64_t ] result
242
- int64_t val, last_obs
237
+ ndarray[intp_t ] result
238
+ intp_t val, last_obs
243
239
244
- result = np.empty(n, dtype = np.int64 )
240
+ result = np.empty(n, dtype = np.intp )
245
241
last_obs = - 1
246
242
247
243
for i in range (n):
0 commit comments