Skip to content

Commit 90a8547

Browse files
committed
CLN/PERF: remove need for VectorData in hashtables
closes pandas-dev#14879
1 parent a68c402 commit 90a8547

File tree

4 files changed

+81
-181
lines changed

4 files changed

+81
-181
lines changed

pandas/hashtable.pyx

+10-9
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,7 @@ cdef class Int64Factorizer:
9999
def factorize(self, int64_t[:] values, sort=False,
100100
na_sentinel=-1, check_null=True):
101101
labels = self.table.get_labels(values, self.uniques,
102-
self.count, na_sentinel,
103-
check_null)
102+
self.count, na_sentinel, check_null)
104103

105104
# sort on
106105
if sort:
@@ -286,25 +285,27 @@ def unique_label_indices(ndarray[int64_t, ndim=1] labels):
286285
"""
287286
cdef:
288287
int ret = 0
289-
Py_ssize_t i, n = len(labels)
288+
Py_ssize_t i, count = 0, n = len(labels)
290289
kh_int64_t * table = kh_init_int64()
291-
Int64Vector idx = Int64Vector()
290+
Int64Vector idx
292291
ndarray[int64_t, ndim=1] arr
293-
Int64VectorData *ud = idx.data
292+
int64_t[:] uindexer
294293

295294
kh_resize_int64(table, min(n, _SIZE_HINT_LIMIT))
295+
uindexer = np.empty(n, dtype=np.int64)
296296

297297
with nogil:
298298
for i in range(n):
299299
kh_put_int64(table, labels[i], &ret)
300300
if ret != 0:
301-
if needs_resize(ud):
302-
with gil:
303-
idx.resize()
304-
append_data_int64(ud, i)
301+
uindexer[count] = i
302+
count += 1
305303

306304
kh_destroy_int64(table)
307305

306+
idx = Int64Vector(count)
307+
for i in range(count):
308+
idx.append(uindexer[i])
308309
arr = idx.to_array()
309310
arr = arr[labels[arr].argsort()]
310311

0 commit comments

Comments
 (0)