Skip to content

Commit e65d9f9

Browse files
committed
small progress
1 parent 832d777 commit e65d9f9

File tree

1 file changed

+31
-7
lines changed

1 file changed

+31
-7
lines changed

pandas/hashtable.pyx

+31-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# cython: profile=False
1+
# cython: profile=True
22

33
from cpython cimport PyObject, Py_INCREF, PyList_Check, PyTuple_Check
44

@@ -32,6 +32,9 @@ PyDateTime_IMPORT
3232
cdef extern from "Python.h":
3333
int PySlice_Check(object)
3434

35+
cdef size_t _INIT_VEC_CAP = 32
36+
cdef size_t _USE_GIL = 100000
37+
3538
def list_to_object_array(list obj):
3639
'''
3740
Convert list to object ndarray. Seriously can't believe I had to write this
@@ -50,8 +53,6 @@ def list_to_object_array(list obj):
5053
return arr
5154

5255

53-
cdef size_t _INIT_VEC_CAP = 32
54-
5556
cdef class Vector:
5657

5758
cdef:
@@ -109,7 +110,7 @@ cdef class Int64Vector(Vector):
109110
self.ao.resize(self.m)
110111
self.data = <int64_t*> self.ao.data
111112

112-
cdef inline void append(self, int64_t x) nogil:
113+
cdef inline void append_nogil(self, int64_t x) nogil:
113114

114115
if self.needs_resize():
115116
with gil:
@@ -118,6 +119,14 @@ cdef class Int64Vector(Vector):
118119
self.data[self.n] = x
119120
self.n += 1
120121

122+
cdef inline void append(self, int64_t x):
123+
124+
if self.needs_resize():
125+
self.resize()
126+
127+
self.data[self.n] = x
128+
self.n += 1
129+
121130
cdef class Float64Vector(Vector):
122131

123132
cdef:
@@ -364,7 +373,22 @@ cdef class Int64HashTable(HashTable):
364373

365374
labels = np.empty(n, dtype=np.int64)
366375

367-
with nogil:
376+
if n > _USE_GIL:
377+
with nogil:
378+
for i in range(n):
379+
val = values[i]
380+
k = kh_get_int64(self.table, val)
381+
if k != self.table.n_buckets:
382+
idx = self.table.vals[k]
383+
labels[i] = idx
384+
else:
385+
k = kh_put_int64(self.table, val, &ret)
386+
self.table.vals[k] = count
387+
uniques.append_nogil(val)
388+
labels[i] = count
389+
count += 1
390+
391+
else:
368392
for i in range(n):
369393
val = values[i]
370394
k = kh_get_int64(self.table, val)
@@ -409,7 +433,7 @@ cdef class Int64HashTable(HashTable):
409433
else:
410434
k = kh_put_int64(self.table, val, &ret)
411435
self.table.vals[k] = count
412-
uniques.append(val)
436+
uniques.append_nogil(val)
413437
labels[i] = count
414438
count += 1
415439

@@ -433,7 +457,7 @@ cdef class Int64HashTable(HashTable):
433457
k = kh_get_int64(self.table, val)
434458
if k == self.table.n_buckets:
435459
kh_put_int64(self.table, val, &ret)
436-
uniques.append(val)
460+
uniques.append_nogil(val)
437461

438462
result = uniques.to_array()
439463

0 commit comments

Comments
 (0)