@@ -99,8 +99,7 @@ cdef class Int64Factorizer:
99
99
def factorize (self , int64_t[:] values , sort = False ,
100
100
na_sentinel = - 1 , check_null = True ):
101
101
labels = self .table.get_labels(values, self .uniques,
102
- self .count, na_sentinel,
103
- check_null)
102
+ self .count, na_sentinel, check_null)
104
103
105
104
# sort on
106
105
if sort:
@@ -286,25 +285,27 @@ def unique_label_indices(ndarray[int64_t, ndim=1] labels):
286
285
"""
287
286
cdef:
288
287
int ret = 0
289
- Py_ssize_t i, n = len (labels)
288
+ Py_ssize_t i, count = 0 , n = len (labels)
290
289
kh_int64_t * table = kh_init_int64()
291
- Int64Vector idx = Int64Vector()
290
+ Int64Vector idx
292
291
ndarray[int64_t, ndim= 1 ] arr
293
- Int64VectorData * ud = idx.data
292
+ int64_t[:] uindexer
294
293
295
294
kh_resize_int64(table, min (n, _SIZE_HINT_LIMIT))
295
+ uindexer = np.empty(n, dtype = np.int64)
296
296
297
297
with nogil:
298
298
for i in range (n):
299
299
kh_put_int64(table, labels[i], & ret)
300
300
if ret != 0 :
301
- if needs_resize(ud):
302
- with gil:
303
- idx.resize()
304
- append_data_int64(ud, i)
301
+ uindexer[count] = i
302
+ count += 1
305
303
306
304
kh_destroy_int64(table)
307
305
306
+ idx = Int64Vector(count)
307
+ for i in range (count):
308
+ idx.append(uindexer[i])
308
309
arr = idx.to_array()
309
310
arr = arr[labels[arr].argsort()]
310
311
0 commit comments