@@ -85,9 +85,9 @@ cdef class {{name}}Vector:
85
85
self.ao = np.empty(self.data.m, dtype={{idtype}})
86
86
self.data.data = <{{arg}}*> self.ao.data
87
87
88
- cdef resize(self):
88
+ cdef resize(self, refcheck=True ):
89
89
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
90
- self.ao.resize(self.data.m)
90
+ self.ao.resize(self.data.m, refcheck=refcheck )
91
91
self.data.data = <{{arg}}*> self.ao.data
92
92
93
93
def __dealloc__(self):
@@ -98,15 +98,15 @@ cdef class {{name}}Vector:
98
98
def __len__(self):
99
99
return self.data.n
100
100
101
- cpdef to_array(self):
102
- self.ao.resize(self.data.n)
101
+ cpdef to_array(self, refcheck=True ):
102
+ self.ao.resize(self.data.n, refcheck=refcheck )
103
103
self.data.m = self.data.n
104
104
return self.ao
105
105
106
- cdef inline void append(self, {{arg}} x):
106
+ cdef inline void append(self, {{arg}} x, refcheck=True ):
107
107
108
108
if needs_resize(self.data):
109
- self.resize()
109
+ self.resize(refcheck=refcheck )
110
110
111
111
append_data_{{dtype}}(self.data, x)
112
112
@@ -130,11 +130,12 @@ cdef class StringVector:
130
130
self.data.m = _INIT_VEC_CAP
131
131
self.data.data = <char **> malloc(self.data.m * sizeof(char *))
132
132
133
- cdef resize(self):
133
+ cdef resize(self, refcheck=True ):
134
134
cdef:
135
135
char **orig_data
136
136
size_t i, m
137
137
138
+ # refcheck ignored, for compatibility only
138
139
m = self.data.m
139
140
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
140
141
@@ -154,23 +155,24 @@ cdef class StringVector:
154
155
def __len__(self):
155
156
return self.data.n
156
157
157
- def to_array(self):
158
+ def to_array(self, refcheck=True ):
158
159
cdef:
159
160
ndarray ao
160
161
size_t n
161
162
object val
162
-
163
+
164
+ # refcheck is unused but needed for API compatibility
163
165
ao = np.empty(self.data.n, dtype=np.object)
164
166
for i in range(self.data.n):
165
167
val = self.data.data[i]
166
168
ao[i] = val
167
169
self.data.m = self.data.n
168
170
return ao
169
171
170
- cdef inline void append(self, char * x):
172
+ cdef inline void append(self, char * x, refcheck=True ):
171
173
172
174
if needs_resize(self.data):
173
- self.resize()
175
+ self.resize(refcheck=refcheck )
174
176
175
177
append_data_string(self.data, x)
176
178
@@ -191,18 +193,18 @@ cdef class ObjectVector:
191
193
def __len__(self):
192
194
return self.n
193
195
194
- cdef inline append(self, object o):
196
+ cdef inline append(self, object o, refcheck=True ):
195
197
if self.n == self.m:
196
198
self.m = max(self.m * 2, _INIT_VEC_CAP)
197
- self.ao.resize(self.m)
199
+ self.ao.resize(self.m, refcheck=refcheck )
198
200
self.data = <PyObject**> self.ao.data
199
201
200
202
Py_INCREF(o)
201
203
self.data[self.n] = <PyObject*> o
202
204
self.n += 1
203
205
204
- def to_array(self):
205
- self.ao.resize(self.n)
206
+ def to_array(self, refcheck=True ):
207
+ self.ao.resize(self.n, refcheck=refcheck )
206
208
self.m = self.n
207
209
return self.ao
208
210
@@ -324,13 +326,13 @@ cdef class {{name}}HashTable(HashTable):
324
326
325
327
def factorize(self, {{dtype}}_t values):
326
328
uniques = {{name}}Vector()
327
- labels = self.get_labels(values, uniques, 0, 0)
328
- return uniques.to_array(), labels
329
+ labels = self.get_labels(values, uniques, 0, 0, refcheck=False )
330
+ return uniques.to_array(refcheck=False ), labels
329
331
330
332
@cython.boundscheck(False)
331
333
def get_labels(self, {{dtype}}_t[:] values, {{name}}Vector uniques,
332
334
Py_ssize_t count_prior, Py_ssize_t na_sentinel,
333
- bint check_null=True):
335
+ bint check_null=True, bint refcheck=True ):
334
336
cdef:
335
337
Py_ssize_t i, n = len(values)
336
338
int64_t[:] labels
@@ -362,7 +364,7 @@ cdef class {{name}}HashTable(HashTable):
362
364
363
365
if needs_resize(ud):
364
366
with gil:
365
- uniques.resize()
367
+ uniques.resize(refcheck=refcheck )
366
368
append_data_{{dtype}}(ud, val)
367
369
labels[i] = count
368
370
count += 1
@@ -405,12 +407,12 @@ cdef class {{name}}HashTable(HashTable):
405
407
406
408
if needs_resize(ud):
407
409
with gil:
408
- uniques.resize()
410
+ uniques.resize(refcheck=False )
409
411
append_data_{{dtype}}(ud, val)
410
412
labels[i] = count
411
413
count += 1
412
414
413
- arr_uniques = uniques.to_array()
415
+ arr_uniques = uniques.to_array(refcheck=False )
414
416
415
417
return np.asarray(labels), arr_uniques
416
418
@@ -438,25 +440,25 @@ cdef class {{name}}HashTable(HashTable):
438
440
kh_put_{{dtype}}(self.table, val, &ret)
439
441
if needs_resize(ud):
440
442
with gil:
441
- uniques.resize()
443
+ uniques.resize(refcheck=False )
442
444
append_data_{{dtype}}(ud, val)
443
445
elif not seen_na:
444
446
seen_na = 1
445
447
if needs_resize(ud):
446
448
with gil:
447
- uniques.resize()
449
+ uniques.resize(refcheck=False )
448
450
append_data_{{dtype}}(ud, NAN)
449
451
{{else}}
450
452
k = kh_get_{{dtype}}(self.table, val)
451
453
if k == self.table.n_buckets:
452
454
kh_put_{{dtype}}(self.table, val, &ret)
453
455
if needs_resize(ud):
454
456
with gil:
455
- uniques.resize()
457
+ uniques.resize(refcheck=False )
456
458
append_data_{{dtype}}(ud, val)
457
459
{{endif}}
458
460
459
- return uniques.to_array()
461
+ return uniques.to_array(refcheck=False )
460
462
461
463
{{endfor}}
462
464
@@ -571,12 +573,12 @@ cdef class StringHashTable(HashTable):
571
573
uniques = ObjectVector()
572
574
for i in range(count):
573
575
uniques.append(values[uindexer[i]])
574
- return uniques.to_array()
576
+ return uniques.to_array(refcheck=False )
575
577
576
578
def factorize(self, ndarray[object] values):
577
579
uniques = ObjectVector()
578
- labels = self.get_labels(values, uniques, 0, 0)
579
- return uniques.to_array(), labels
580
+ labels = self.get_labels(values, uniques, 0, 0, refcheck=0 )
581
+ return uniques.to_array(refcheck=False ), labels
580
582
581
583
@cython.boundscheck(False)
582
584
def lookup(self, ndarray[object] values):
@@ -642,7 +644,7 @@ cdef class StringHashTable(HashTable):
642
644
@cython.boundscheck(False)
643
645
def get_labels(self, ndarray[object] values, ObjectVector uniques,
644
646
Py_ssize_t count_prior, int64_t na_sentinel,
645
- bint check_null=1):
647
+ bint check_null=1, bint refcheck=1 ):
646
648
cdef:
647
649
Py_ssize_t i, n = len(values)
648
650
int64_t[:] labels
@@ -654,6 +656,7 @@ cdef class StringHashTable(HashTable):
654
656
char **vecs
655
657
khiter_t k
656
658
659
+ # refcheck is for compatibility
657
660
# these by-definition *must* be strings
658
661
labels = np.zeros(n, dtype=np.int64)
659
662
uindexer = np.empty(n, dtype=np.int64)
@@ -811,11 +814,11 @@ cdef class PyObjectHashTable(HashTable):
811
814
seen_na = 1
812
815
uniques.append(nan)
813
816
814
- return uniques.to_array()
817
+ return uniques.to_array(refcheck=False )
815
818
816
819
def get_labels(self, ndarray[object] values, ObjectVector uniques,
817
820
Py_ssize_t count_prior, int64_t na_sentinel,
818
- bint check_null=True):
821
+ bint check_null=True, bint refcheck=True ):
819
822
cdef:
820
823
Py_ssize_t i, n = len(values)
821
824
int64_t[:] labels
@@ -968,5 +971,5 @@ cdef class MultiIndexHashTable(HashTable):
968
971
969
972
def get_labels(self, object mi, ObjectVector uniques,
970
973
Py_ssize_t count_prior, int64_t na_sentinel,
971
- bint check_null=True):
974
+ bint check_null=True, bint refcheck=True ):
972
975
raise NotImplementedError
0 commit comments