@@ -321,6 +321,113 @@ cdef class StringHashTable:
321
321
# return None
322
322
return reverse, labels, counts[:count].copy()
323
323
324
+ cdef class Int64HashTable:
325
+
326
+ cdef:
327
+ kh_int64_t * table
328
+
329
+ def __init__ (self , size_hint = 1 ):
330
+ if size_hint is not None :
331
+ kh_resize_int64(self .table, size_hint)
332
+
333
+ def __cinit__ (self ):
334
+ self .table = kh_init_int64()
335
+
336
+ def __dealloc__ (self ):
337
+ kh_destroy_int64(self .table)
338
+
339
+ cdef inline int check_type(self , object val):
340
+ return PyString_Check(val)
341
+
342
+ cpdef get_item(self , int64_t val):
343
+ cdef khiter_t k
344
+ k = kh_get_int64(self .table, val)
345
+ if k != self .table.n_buckets:
346
+ return self .table.vals[k]
347
+ else :
348
+ raise KeyError (val)
349
+
350
+ def get_iter_test (self , int64_t key , Py_ssize_t iterations ):
351
+ cdef Py_ssize_t i, val
352
+ for i in range (iterations):
353
+ k = kh_get_int64(self .table, val)
354
+ if k != self .table.n_buckets:
355
+ val = self .table.vals[k]
356
+
357
+ cpdef set_item(self , int64_t key, Py_ssize_t val):
358
+ cdef:
359
+ khiter_t k
360
+ int ret
361
+
362
+ k = kh_put_int64(self .table, key, & ret)
363
+ self .table.keys[k] = key
364
+ if kh_exist_int64(self .table, k):
365
+ self .table.vals[k] = val
366
+ else :
367
+ raise KeyError (key)
368
+
369
+ def map_locations (self , ndarray[int64_t] values ):
370
+ cdef:
371
+ Py_ssize_t i, n = len (values)
372
+ int ret
373
+ int64_t val
374
+ khiter_t k
375
+
376
+ for i in range (n):
377
+ val = values[i]
378
+ k = kh_put_int64(self .table, val, & ret)
379
+ # print 'putting %s, %s' % (val, count)
380
+ self .table.vals[k] = i
381
+
382
+ def lookup_locations (self , ndarray[int64_t] values ):
383
+ cdef:
384
+ Py_ssize_t i, n = len (values)
385
+ int ret
386
+ int64_t val
387
+ khiter_t k
388
+ ndarray[int32_t] locs = np.empty(n, dtype = ' i4' )
389
+
390
+ for i in range (n):
391
+ val = values[i]
392
+ k = kh_get_int64(self .table, val)
393
+ if k != self .table.n_buckets:
394
+ locs[i] = self .table.vals[k]
395
+ else :
396
+ locs[i] = - 1
397
+
398
+ return locs
399
+
400
+ def factorize (self , ndarray[object] values ):
401
+ cdef:
402
+ Py_ssize_t i, n = len (values)
403
+ ndarray[int32_t] labels = np.empty(n, dtype = np.int32)
404
+ ndarray[int32_t] counts = np.empty(n, dtype = np.int32)
405
+ dict reverse = {}
406
+ Py_ssize_t idx, count = 0
407
+ int ret
408
+ int64_t val
409
+ khiter_t k
410
+
411
+ for i in range (n):
412
+ val = values[i]
413
+ k = kh_get_int64(self .table, val)
414
+ if k != self .table.n_buckets:
415
+ idx = self .table.vals[k]
416
+ labels[i] = idx
417
+ counts[idx] = counts[idx] + 1
418
+ else :
419
+ k = kh_put_int64(self .table, val, & ret)
420
+ if not ret:
421
+ kh_del_int64(self .table, k)
422
+ self .table.vals[k] = count
423
+ reverse[count] = val
424
+ labels[i] = count
425
+ counts[count] = 1
426
+ count += 1
427
+
428
+ # return None
429
+ return reverse, labels, counts[:count].copy()
430
+
324
431
from libc.stdlib cimport free
325
432
326
433
cdef class PyObjectHashTable:
0 commit comments