@@ -85,10 +85,11 @@ cdef class {{name}}Vector:
85
85
self.ao = np.empty(self.data.m, dtype={{idtype}})
86
86
self.data.data = <{{arg}}*> self.ao.data
87
87
88
- cdef resize(self):
89
- self.data. m = max(self.data.m * 4, _INIT_VEC_CAP)
90
- self.ao.resize(self.data.m)
88
+ cdef resize(self, refcheck=True ):
89
+ m = max(self.data.m * 4, _INIT_VEC_CAP)
90
+ self.ao.resize(m, refcheck=refcheck) # could raise, change m later
91
91
self.data.data = <{{arg}}*> self.ao.data
92
+ self.data.m = m
92
93
93
94
def __dealloc__(self):
94
95
if self.data is not NULL:
@@ -98,21 +99,21 @@ cdef class {{name}}Vector:
98
99
def __len__(self):
99
100
return self.data.n
100
101
101
- cpdef to_array(self):
102
- self.ao.resize(self.data.n)
102
+ cpdef to_array(self, refcheck=True ):
103
+ self.ao.resize(self.data.n, refcheck=refcheck )
103
104
self.data.m = self.data.n
105
+ self.data.data = <{{arg}}*> self.ao.data
104
106
return self.ao
105
107
106
- cdef inline void append(self, {{arg}} x):
108
+ cdef inline void append(self, {{arg}} x, refcheck=True ):
107
109
108
110
if needs_resize(self.data):
109
- self.resize()
110
-
111
+ self.resize(refcheck=refcheck)
111
112
append_data_{{dtype}}(self.data, x)
112
113
113
- cdef extend(self, {{arg}}[:] x):
114
+ cdef extend(self, {{arg}}[:] x, refcheck=True ):
114
115
for i in range(len(x)):
115
- self.append(x[i])
116
+ self.append(x[i], refcheck=refcheck )
116
117
117
118
{{endfor}}
118
119
@@ -130,11 +131,12 @@ cdef class StringVector:
130
131
self.data.m = _INIT_VEC_CAP
131
132
self.data.data = <char **> malloc(self.data.m * sizeof(char *))
132
133
133
- cdef resize(self):
134
+ cdef resize(self, refcheck=True ):
134
135
cdef:
135
136
char **orig_data
136
137
size_t i, m
137
138
139
+ # refcheck ignored, for compatibility only
138
140
m = self.data.m
139
141
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
140
142
@@ -154,23 +156,24 @@ cdef class StringVector:
154
156
def __len__(self):
155
157
return self.data.n
156
158
157
- def to_array(self):
159
+ def to_array(self, refcheck=True ):
158
160
cdef:
159
161
ndarray ao
160
162
size_t n
161
163
object val
162
164
165
+ # refcheck ignored, for compatibility only
163
166
ao = np.empty(self.data.n, dtype=np.object)
164
167
for i in range(self.data.n):
165
168
val = self.data.data[i]
166
169
ao[i] = val
167
170
self.data.m = self.data.n
168
171
return ao
169
172
170
- cdef inline void append(self, char * x):
173
+ cdef inline void append(self, char * x, refcheck=True ):
171
174
172
175
if needs_resize(self.data):
173
- self.resize()
176
+ self.resize(refcheck=refcheck )
174
177
175
178
append_data_string(self.data, x)
176
179
@@ -191,18 +194,18 @@ cdef class ObjectVector:
191
194
def __len__(self):
192
195
return self.n
193
196
194
- cdef inline append(self, object o):
197
+ cdef inline append(self, object o, refcheck=True ):
195
198
if self.n == self.m:
196
199
self.m = max(self.m * 2, _INIT_VEC_CAP)
197
- self.ao.resize(self.m)
200
+ self.ao.resize(self.m, refcheck=refcheck )
198
201
self.data = <PyObject**> self.ao.data
199
202
200
203
Py_INCREF(o)
201
204
self.data[self.n] = <PyObject*> o
202
205
self.n += 1
203
206
204
- def to_array(self):
205
- self.ao.resize(self.n)
207
+ def to_array(self, refcheck=True ):
208
+ self.ao.resize(self.n, refcheck=refcheck )
206
209
self.m = self.n
207
210
return self.ao
208
211
@@ -324,13 +327,13 @@ cdef class {{name}}HashTable(HashTable):
324
327
325
328
def factorize(self, {{dtype}}_t values):
326
329
uniques = {{name}}Vector()
327
- labels = self.get_labels(values, uniques, 0, 0)
328
- return uniques.to_array(), labels
330
+ labels = self.get_labels(values, uniques, 0, 0, refcheck=False )
331
+ return uniques.to_array(refcheck=False ), labels
329
332
330
333
@cython.boundscheck(False)
331
334
def get_labels(self, {{dtype}}_t[:] values, {{name}}Vector uniques,
332
335
Py_ssize_t count_prior, Py_ssize_t na_sentinel,
333
- bint check_null=True):
336
+ bint check_null=True, bint refcheck=True ):
334
337
cdef:
335
338
Py_ssize_t i, n = len(values)
336
339
int64_t[:] labels
@@ -362,7 +365,7 @@ cdef class {{name}}HashTable(HashTable):
362
365
363
366
if needs_resize(ud):
364
367
with gil:
365
- uniques.resize()
368
+ uniques.resize(refcheck=refcheck )
366
369
append_data_{{dtype}}(ud, val)
367
370
labels[i] = count
368
371
count += 1
@@ -405,12 +408,12 @@ cdef class {{name}}HashTable(HashTable):
405
408
406
409
if needs_resize(ud):
407
410
with gil:
408
- uniques.resize()
411
+ uniques.resize(refcheck=False )
409
412
append_data_{{dtype}}(ud, val)
410
413
labels[i] = count
411
414
count += 1
412
415
413
- arr_uniques = uniques.to_array()
416
+ arr_uniques = uniques.to_array(refcheck=False )
414
417
415
418
return np.asarray(labels), arr_uniques
416
419
@@ -438,25 +441,25 @@ cdef class {{name}}HashTable(HashTable):
438
441
kh_put_{{dtype}}(self.table, val, &ret)
439
442
if needs_resize(ud):
440
443
with gil:
441
- uniques.resize()
444
+ uniques.resize(refcheck=False )
442
445
append_data_{{dtype}}(ud, val)
443
446
elif not seen_na:
444
447
seen_na = 1
445
448
if needs_resize(ud):
446
449
with gil:
447
- uniques.resize()
450
+ uniques.resize(refcheck=False )
448
451
append_data_{{dtype}}(ud, NAN)
449
452
{{else}}
450
453
k = kh_get_{{dtype}}(self.table, val)
451
454
if k == self.table.n_buckets:
452
455
kh_put_{{dtype}}(self.table, val, &ret)
453
456
if needs_resize(ud):
454
457
with gil:
455
- uniques.resize()
458
+ uniques.resize(refcheck=False )
456
459
append_data_{{dtype}}(ud, val)
457
460
{{endif}}
458
461
459
- return uniques.to_array()
462
+ return uniques.to_array(refcheck=False )
460
463
461
464
{{endfor}}
462
465
@@ -571,12 +574,12 @@ cdef class StringHashTable(HashTable):
571
574
uniques = ObjectVector()
572
575
for i in range(count):
573
576
uniques.append(values[uindexer[i]])
574
- return uniques.to_array()
577
+ return uniques.to_array(refcheck=False )
575
578
576
579
def factorize(self, ndarray[object] values):
577
580
uniques = ObjectVector()
578
- labels = self.get_labels(values, uniques, 0, 0)
579
- return uniques.to_array(), labels
581
+ labels = self.get_labels(values, uniques, 0, 0, refcheck=False )
582
+ return uniques.to_array(refcheck=False ), labels
580
583
581
584
@cython.boundscheck(False)
582
585
def lookup(self, ndarray[object] values):
@@ -642,7 +645,7 @@ cdef class StringHashTable(HashTable):
642
645
@cython.boundscheck(False)
643
646
def get_labels(self, ndarray[object] values, ObjectVector uniques,
644
647
Py_ssize_t count_prior, int64_t na_sentinel,
645
- bint check_null=1):
648
+ bint check_null=1, bint refcheck=1 ):
646
649
cdef:
647
650
Py_ssize_t i, n = len(values)
648
651
int64_t[:] labels
@@ -654,6 +657,8 @@ cdef class StringHashTable(HashTable):
654
657
char **vecs
655
658
khiter_t k
656
659
660
+ # refcheck ignored, for compatibility only
661
+
657
662
# these by-definition *must* be strings
658
663
labels = np.zeros(n, dtype=np.int64)
659
664
uindexer = np.empty(n, dtype=np.int64)
@@ -811,11 +816,11 @@ cdef class PyObjectHashTable(HashTable):
811
816
seen_na = 1
812
817
uniques.append(nan)
813
818
814
- return uniques.to_array()
819
+ return uniques.to_array(refcheck=False )
815
820
816
821
def get_labels(self, ndarray[object] values, ObjectVector uniques,
817
822
Py_ssize_t count_prior, int64_t na_sentinel,
818
- bint check_null=True):
823
+ bint check_null=True, bint refcheck=True ):
819
824
cdef:
820
825
Py_ssize_t i, n = len(values)
821
826
int64_t[:] labels
@@ -824,6 +829,8 @@ cdef class PyObjectHashTable(HashTable):
824
829
object val
825
830
khiter_t k
826
831
832
+ # refcheck ignored, for compatibility only
833
+
827
834
labels = np.empty(n, dtype=np.int64)
828
835
829
836
for i in range(n):
0 commit comments