@@ -255,10 +255,56 @@ dtypes = [('Float64', 'float64', 'val != val', True),
255
255
('UInt64', 'uint64', 'False', False),
256
256
('Int64', 'int64', 'val == iNaT', False)]
257
257
258
+ def get_dispatch(dtypes):
259
+ for (name, dtype, null_condition, float_group) in dtypes:
260
+ unique_template = """\
261
+ cdef:
262
+ Py_ssize_t i, n = len(values)
263
+ int ret = 0
264
+ {dtype}_t val
265
+ khiter_t k
266
+ bint seen_na = 0
267
+ {name}Vector uniques = {name}Vector()
268
+ {name}VectorData *ud
269
+
270
+ ud = uniques.data
271
+
272
+ with nogil:
273
+ for i in range(n):
274
+ val = values[i]
275
+ IF {float_group}:
276
+ if val == val:
277
+ k = kh_get_{dtype}(self.table, val)
278
+ if k == self.table.n_buckets:
279
+ kh_put_{dtype}(self.table, val, &ret)
280
+ if needs_resize(ud):
281
+ with gil:
282
+ uniques.resize()
283
+ append_data_{dtype}(ud, val)
284
+ elif not seen_na:
285
+ seen_na = 1
286
+ if needs_resize(ud):
287
+ with gil:
288
+ uniques.resize()
289
+ append_data_{dtype}(ud, NAN)
290
+ ELSE:
291
+ k = kh_get_{dtype}(self.table, val)
292
+ if k == self.table.n_buckets:
293
+ kh_put_{dtype}(self.table, val, &ret)
294
+ if needs_resize(ud):
295
+ with gil:
296
+ uniques.resize()
297
+ append_data_{dtype}(ud, val)
298
+ return uniques.to_array()
299
+ """
300
+
301
+ unique_template = unique_template.format(name=name, dtype=dtype, null_condition=null_condition, float_group=float_group)
302
+
303
+ yield (name, dtype, null_condition, float_group, unique_template)
258
304
}}
259
305
260
306
261
- {{for name, dtype, null_condition, float_group in dtypes}}
307
+ {{for name, dtype, null_condition, float_group, unique_template in get_dispatch( dtypes) }}
262
308
263
309
cdef class {{name}}HashTable(HashTable):
264
310
@@ -450,48 +496,20 @@ cdef class {{name}}HashTable(HashTable):
450
496
return np.asarray(labels), arr_uniques
451
497
452
498
@cython.boundscheck(False)
453
- def unique(self, {{dtype}}_t[:] values):
454
- cdef:
455
- Py_ssize_t i, n = len(values)
456
- int ret = 0
457
- {{dtype}}_t val
458
- khiter_t k
459
- bint seen_na = 0
460
- {{name}}Vector uniques = {{name}}Vector()
461
- {{name}}VectorData *ud
499
+ def unique(self, ndarray[{{dtype}}_t, ndim=1] values):
500
+ if values.flags.writeable:
501
+ # If the value is writeable (mutable) then use memview
502
+ return self.unique_memview(values)
462
503
463
- ud = uniques.data
464
-
465
- with nogil:
466
- for i in range(n):
467
- val = values[i]
468
-
469
- {{if float_group}}
470
- if val == val:
471
- k = kh_get_{{dtype}}(self.table, val)
472
- if k == self.table.n_buckets:
473
- kh_put_{{dtype}}(self.table, val, &ret)
474
- if needs_resize(ud):
475
- with gil:
476
- uniques.resize()
477
- append_data_{{dtype}}(ud, val)
478
- elif not seen_na:
479
- seen_na = 1
480
- if needs_resize(ud):
481
- with gil:
482
- uniques.resize()
483
- append_data_{{dtype}}(ud, NAN)
484
- {{else}}
485
- k = kh_get_{{dtype}}(self.table, val)
486
- if k == self.table.n_buckets:
487
- kh_put_{{dtype}}(self.table, val, &ret)
488
- if needs_resize(ud):
489
- with gil:
490
- uniques.resize()
491
- append_data_{{dtype}}(ud, val)
492
- {{endif}}
504
+ # We cannot use the memoryview version on readonly-buffers due to
505
+ # a limitation of Cython's typed memoryviews. Instead we can use
506
+ # the slightly slower Cython ndarray type directly.
507
+ # see https://github.com/cython/cython/issues/1605
508
+ {{unique_template}}
493
509
494
- return uniques.to_array()
510
+ @cython.boundscheck(False)
511
+ def unique_memview(self, {{dtype}}_t[:] values):
512
+ {{unique_template}}
495
513
496
514
{{endfor}}
497
515
0 commit comments