@@ -58,10 +58,10 @@ cdef inline bint needs_resize(vector_data *data) nogil:
58
58
59
59
{{py:
60
60
61
- # name, dtype, arg, idtype
62
- dtypes = [('Float64', 'float64', 'float64_t', 'np.float64 '),
63
- ('UInt64', 'uint64', 'uint64_t', 'np.uint64 '),
64
- ('Int64', 'int64', 'int64_t', 'np.int64 ')]
61
+ # name, dtype, arg, type_id
62
+ dtypes = [('Float64', 'float64', 'float64_t', 'cnp.NPY_FLOAT64 '),
63
+ ('UInt64', 'uint64', 'uint64_t', 'cnp.NPY_UINT64 '),
64
+ ('Int64', 'int64', 'int64_t', 'cnp.NPY_INT64 ')]
65
65
66
66
}}
67
67
@@ -71,6 +71,7 @@ cdef class {{name}}Vector:
71
71
72
72
{{if dtype != 'int64'}}
73
73
cdef:
74
+ bint external_view_exists
74
75
{{name}}VectorData *data
75
76
ndarray ao
76
77
{{endif}}
@@ -80,28 +81,41 @@ cdef class {{name}}Vector:
80
81
sizeof({{name}}VectorData))
81
82
if not self.data:
82
83
raise MemoryError()
84
+ self.external_view_exists = False
83
85
self.data.n = 0
84
86
self.data.m = _INIT_VEC_CAP
85
- self.ao = np.empty(self.data.m, dtype={{idtype}})
86
- self.data.data = <{{arg}}*> self.ao.data
87
+ self.data.data = <{{arg}}*> malloc(self.data.m * sizeof({{arg}}))
87
88
88
- cdef resize(self):
89
+ cdef void resize(self) nogil:
90
+ # TODO: handle failure to allocate
89
91
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
90
- self.ao.resize(self.data.m)
91
- self.data.data = <{{arg}}*> self.ao.data
92
+ self.data.data = <{{arg}}*> realloc(self.data.data, self.data.m * sizeof({{arg}}))
92
93
93
94
def __dealloc__(self):
94
95
if self.data is not NULL:
96
+ if not self.external_view_exists and self.data.data:
97
+ # buffer was never returned as array, so free
98
+ free(self.data.data)
95
99
PyMem_Free(self.data)
96
100
self.data = NULL
97
101
98
102
def __len__(self):
99
103
return self.data.n
100
104
101
105
cpdef to_array(self):
102
- self.ao.resize(self.data.n)
103
- self.data.m = self.data.n
104
- return self.ao
106
+ cdef:
107
+ ndarray ao
108
+ cnp.npy_intp shape[1]
109
+ if self.external_view_exists:
110
+ raise ValueError("Vector.to_array() can only be called once")
111
+
112
+ self.data.data = <{{arg}}*> realloc(self.data.data, self.data.n * sizeof({{arg}}))
113
+ self.external_view_exists = True
114
+ shape[0] = self.data.n
115
+ ao = cnp.PyArray_SimpleNewFromData(1, shape, {{idtype}}, <void*>self.data.data)
116
+ # ownership transfer so numpy eventually frees
117
+ set_array_owndata(ao)
118
+ return ao
105
119
106
120
cdef inline void append(self, {{arg}} x):
107
121
@@ -120,32 +134,28 @@ cdef class StringVector:
120
134
121
135
cdef:
122
136
StringVectorData *data
137
+ bint external_view_exists
123
138
124
139
def __cinit__(self):
125
140
self.data = <StringVectorData *>PyMem_Malloc(
126
141
sizeof(StringVectorData))
127
142
if not self.data:
128
143
raise MemoryError()
144
+ self.external_view_exists = False
129
145
self.data.n = 0
130
146
self.data.m = _INIT_VEC_CAP
131
147
self.data.data = <char **> malloc(self.data.m * sizeof(char *))
132
148
133
- cdef resize(self):
134
- cdef:
135
- char **orig_data
136
- size_t i, m
137
-
138
- m = self.data.m
149
+ cdef void resize(self) nogil:
139
150
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
140
151
141
- # TODO: can resize?
142
- orig_data = self.data.data
143
- self.data.data = <char **> malloc(self.data.m * sizeof(char *))
144
- for i in range(m):
145
- self.data.data[i] = orig_data[i]
152
+ self.data.data = <char **> realloc(self.data.data,
153
+ self.data.m * sizeof(char *))
146
154
147
155
def __dealloc__(self):
148
156
if self.data is not NULL:
157
+ # unlike numeric vectors, strings were copied
158
+ # into python objects so always free
149
159
if self.data.data is not NULL:
150
160
free(self.data.data)
151
161
PyMem_Free(self.data)
@@ -159,12 +169,15 @@ cdef class StringVector:
159
169
ndarray ao
160
170
size_t n
161
171
object val
172
+ if self.external_view_exists:
173
+ raise ValueError("Vector.to_array() can only be called once")
162
174
163
175
ao = np.empty(self.data.n, dtype=np.object)
164
176
for i in range(self.data.n):
165
177
val = self.data.data[i]
166
178
ao[i] = val
167
179
self.data.m = self.data.n
180
+ self.external_view_exists = True
168
181
return ao
169
182
170
183
cdef inline void append(self, char * x):
@@ -181,8 +194,10 @@ cdef class ObjectVector:
181
194
PyObject **data
182
195
size_t n, m
183
196
ndarray ao
197
+ bint external_view_exists
184
198
185
199
def __cinit__(self):
200
+ self.external_view_exists = False
186
201
self.n = 0
187
202
self.m = _INIT_VEC_CAP
188
203
self.ao = np.empty(_INIT_VEC_CAP, dtype=object)
@@ -194,16 +209,19 @@ cdef class ObjectVector:
194
209
cdef inline append(self, object o):
195
210
if self.n == self.m:
196
211
self.m = max(self.m * 2, _INIT_VEC_CAP)
197
- self.ao.resize(self.m)
212
+ self.ao.resize(self.m, refcheck=False )
198
213
self.data = <PyObject**> self.ao.data
199
214
200
215
Py_INCREF(o)
201
216
self.data[self.n] = <PyObject*> o
202
217
self.n += 1
203
218
204
219
def to_array(self):
220
+ if self.external_view_exists:
221
+ raise ValueError("Vector.to_array() can only be called once")
205
222
self.ao.resize(self.n)
206
223
self.m = self.n
224
+ self.external_view_exists = True
207
225
return self.ao
208
226
209
227
@@ -361,8 +379,7 @@ cdef class {{name}}HashTable(HashTable):
361
379
self.table.vals[k] = count
362
380
363
381
if needs_resize(ud):
364
- with gil:
365
- uniques.resize()
382
+ uniques.resize()
366
383
append_data_{{dtype}}(ud, val)
367
384
labels[i] = count
368
385
count += 1
@@ -404,8 +421,7 @@ cdef class {{name}}HashTable(HashTable):
404
421
self.table.vals[k] = count
405
422
406
423
if needs_resize(ud):
407
- with gil:
408
- uniques.resize()
424
+ uniques.resize()
409
425
append_data_{{dtype}}(ud, val)
410
426
labels[i] = count
411
427
count += 1
@@ -437,22 +453,19 @@ cdef class {{name}}HashTable(HashTable):
437
453
if k == self.table.n_buckets:
438
454
kh_put_{{dtype}}(self.table, val, &ret)
439
455
if needs_resize(ud):
440
- with gil:
441
- uniques.resize()
456
+ uniques.resize()
442
457
append_data_{{dtype}}(ud, val)
443
458
elif not seen_na:
444
459
seen_na = 1
445
460
if needs_resize(ud):
446
- with gil:
447
- uniques.resize()
461
+ uniques.resize()
448
462
append_data_{{dtype}}(ud, NAN)
449
463
{{else}}
450
464
k = kh_get_{{dtype}}(self.table, val)
451
465
if k == self.table.n_buckets:
452
466
kh_put_{{dtype}}(self.table, val, &ret)
453
467
if needs_resize(ud):
454
- with gil:
455
- uniques.resize()
468
+ uniques.resize()
456
469
append_data_{{dtype}}(ud, val)
457
470
{{endif}}
458
471
0 commit comments