Skip to content

Commit a0e436c

Browse files
committed
khash.h: sync with 0.2.8 upstream release
1 parent 04bf4bf commit a0e436c

File tree

1 file changed

+120
-50
lines changed

1 file changed

+120
-50
lines changed

pandas/src/klib/khash.h

+120-50
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,23 @@ int main() {
4747
*/
4848

4949
/*
50+
2013-05-02 (0.2.8):
51+
52+
* Use quadratic probing. When the capacity is power of 2, stepping function
53+
i*(i+1)/2 guarantees to traverse each bucket. It is better than double
54+
hashing on cache performance and is more robust than linear probing.
55+
56+
In theory, double hashing should be more robust than quadratic probing.
57+
However, my implementation is probably not for large hash tables, because
58+
the second hash function is closely tied to the first hash function,
59+
which reduce the effectiveness of double hashing.
60+
61+
Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
62+
63+
2011-12-29 (0.2.7):
64+
65+
* Minor code clean up; no actual effect.
66+
5067
2011-09-16 (0.2.6):
5168
5269
* The capacity is a power of 2. This seems to dramatically improve the
@@ -107,12 +124,13 @@ int main() {
107124
Generic hash table library.
108125
*/
109126

110-
#define AC_VERSION_KHASH_H "0.2.6"
127+
#define AC_VERSION_KHASH_H "0.2.8"
111128

112129
#include <stdlib.h>
113130
#include <string.h>
114131
#include <limits.h>
115132

133+
/* compiler specific configuration */
116134

117135
#if UINT_MAX == 0xffffffffu
118136
typedef unsigned int khint32_t;
@@ -154,50 +172,54 @@ typedef khint_t khiter_t;
154172
#define __ac_set_isboth_false(flag, i) __ac_set_isempty_false(flag, i)
155173
#define __ac_set_isdel_true(flag, i) (0)
156174

157-
#ifdef KHASH_LINEAR
158-
#define __ac_inc(k, m) 1
159-
#else
160-
#define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m)
161-
#endif
162175

163176
#define __ac_fsize(m) ((m) < 32? 1 : (m)>>5)
164177

165178
#ifndef kroundup32
166179
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
167180
#endif
168181

182+
#ifndef kcalloc
183+
#define kcalloc(N,Z) calloc(N,Z)
184+
#endif
185+
#ifndef kmalloc
186+
#define kmalloc(Z) malloc(Z)
187+
#endif
188+
#ifndef krealloc
189+
#define krealloc(P,Z) realloc(P,Z)
190+
#endif
191+
#ifndef kfree
192+
#define kfree(P) free(P)
193+
#endif
194+
169195
static const double __ac_HASH_UPPER = 0.77;
170196

171-
#define KHASH_DECLARE(name, khkey_t, khval_t) \
172-
typedef struct { \
173-
khint_t n_buckets, size, n_occupied, upper_bound; \
174-
khint32_t *flags; \
175-
khkey_t *keys; \
176-
khval_t *vals; \
177-
} kh_##name##_t; \
178-
extern kh_##name##_t *kh_init_##name(); \
197+
#define __KHASH_TYPE(name, khkey_t, khval_t) \
198+
typedef struct { \
199+
khint_t n_buckets, size, n_occupied, upper_bound; \
200+
khint32_t *flags; \
201+
khkey_t *keys; \
202+
khval_t *vals; \
203+
} kh_##name##_t;
204+
205+
#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
206+
extern kh_##name##_t *kh_init_##name(void); \
179207
extern void kh_destroy_##name(kh_##name##_t *h); \
180208
extern void kh_clear_##name(kh_##name##_t *h); \
181209
extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
182-
extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
210+
extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
183211
extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret);
184212

185-
#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
186-
typedef struct { \
187-
khint_t n_buckets, size, n_occupied, upper_bound; \
188-
khint32_t *flags; \
189-
khkey_t *keys; \
190-
khval_t *vals; \
191-
} kh_##name##_t; \
213+
#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
192214
SCOPE kh_##name##_t *kh_init_##name(void) { \
193-
return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
215+
return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \
194216
} \
195217
SCOPE void kh_destroy_##name(kh_##name##_t *h) \
196218
{ \
197219
if (h) { \
198-
free(h->keys); free(h->flags); \
199-
free(h->vals); \
200-
free(h); \
220+
kfree((void *)h->keys); kfree(h->flags); \
221+
kfree((void *)h->vals); \
222+
kfree(h); \
201223
} \
202224
} \
203225
SCOPE void kh_clear_##name(kh_##name##_t *h) \
@@ -210,31 +232,38 @@ static const double __ac_HASH_UPPER = 0.77;
210232
SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
211233
{ \
212234
if (h->n_buckets) { \
213-
khint_t inc, k, i, last, mask; \
235+
khint_t k, i, last, mask, step = 0; \
214236
mask = h->n_buckets - 1; \
215237
k = __hash_func(key); i = k & mask; \
216-
inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
238+
last = i; \
217239
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
218-
i = (i + inc) & mask; \
240+
i = (i + (++step)) & mask; \
219241
if (i == last) return h->n_buckets; \
220242
} \
221243
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
222244
} else return 0; \
223245
} \
224-
SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
225-
{ /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
246+
SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
247+
{ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
226248
khint32_t *new_flags = 0; \
227249
khint_t j = 1; \
228250
{ \
229251
kroundup32(new_n_buckets); \
230252
if (new_n_buckets < 4) new_n_buckets = 4; \
231253
if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
232254
else { /* hash table size to be changed (shrink or expand); rehash */ \
233-
new_flags = (khint32_t*)malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
255+
new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
256+
if (!new_flags) return -1; \
234257
memset(new_flags, 0xff, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
235258
if (h->n_buckets < new_n_buckets) { /* expand */ \
236-
h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
237-
if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
259+
khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
260+
if (!new_keys) { kfree(new_flags); return -1; } \
261+
h->keys = new_keys; \
262+
if (kh_is_map) { \
263+
khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
264+
if (!new_vals) { kfree(new_flags); return -1; } \
265+
h->vals = new_vals; \
266+
} \
238267
} /* otherwise shrink */ \
239268
} \
240269
} \
@@ -248,11 +277,10 @@ static const double __ac_HASH_UPPER = 0.77;
248277
if (kh_is_map) val = h->vals[j]; \
249278
__ac_set_isempty_true(h->flags, j); \
250279
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
251-
khint_t inc, k, i; \
280+
khint_t k, i, step = 0; \
252281
k = __hash_func(key); \
253282
i = k & new_mask; \
254-
inc = __ac_inc(k, new_mask); \
255-
while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
283+
while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
256284
__ac_set_isempty_false(new_flags, i); \
257285
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
258286
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
@@ -267,32 +295,38 @@ static const double __ac_HASH_UPPER = 0.77;
267295
} \
268296
} \
269297
if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
270-
h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
271-
if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
298+
h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
299+
if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
272300
} \
273-
free(h->flags); /* free the working space */ \
301+
kfree(h->flags); /* free the working space */ \
274302
h->flags = new_flags; \
275303
h->n_buckets = new_n_buckets; \
276304
h->n_occupied = h->size; \
277305
h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
278306
} \
307+
return 0; \
279308
} \
280309
SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
281310
{ \
282311
khint_t x; \
283312
if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
284-
if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \
285-
else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \
313+
if (h->n_buckets > (h->size<<1)) { \
314+
if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
315+
*ret = -1; return h->n_buckets; \
316+
} \
317+
} else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
318+
*ret = -1; return h->n_buckets; \
319+
} \
286320
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
287321
{ \
288-
khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \
322+
khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
289323
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
290324
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
291325
else { \
292-
inc = __ac_inc(k, mask); last = i; \
326+
last = i; \
293327
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
294328
if (__ac_isdel(h->flags, i)) site = i; \
295-
i = (i + inc) & mask; \
329+
i = (i + (++step)) & mask; \
296330
if (i == last) { x = site; break; } \
297331
} \
298332
if (x == h->n_buckets) { \
@@ -315,6 +349,14 @@ static const double __ac_HASH_UPPER = 0.77;
315349
return x; \
316350
}
317351

352+
#define KHASH_DECLARE(name, khkey_t, khval_t) \
353+
__KHASH_TYPE(name, khkey_t, khval_t) \
354+
__KHASH_PROTOTYPES(name, khkey_t, khval_t)
355+
356+
#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
357+
__KHASH_TYPE(name, khkey_t, khval_t) \
358+
__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
359+
318360
#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
319361
KHASH_INIT2(name, static PANDAS_INLINE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
320362

@@ -348,8 +390,8 @@ static const double __ac_HASH_UPPER = 0.77;
348390
*/
349391
static PANDAS_INLINE khint_t __ac_X31_hash_string(const char *s)
350392
{
351-
khint_t h = *s;
352-
if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
393+
khint_t h = (khint_t)*s;
394+
if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
353395
return h;
354396
}
355397
/*! @function
@@ -419,7 +461,8 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
419461
@param name Name of the hash table [symbol]
420462
@param h Pointer to the hash table [khash_t(name)*]
421463
@param k Key [type of keys]
422-
@param r Extra return code: 0 if the key is present in the hash table;
464+
@param r Extra return code: -1 if the operation failed;
465+
0 if the key is present in the hash table;
423466
1 if the bucket is empty (never used); 2 if the element in
424467
the bucket has been deleted [int*]
425468
@return Iterator to the inserted element [khint_t]
@@ -431,7 +474,7 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
431474
@param name Name of the hash table [symbol]
432475
@param h Pointer to the hash table [khash_t(name)*]
433476
@param k Key [type of keys]
434-
@return Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
477+
@return Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
435478
*/
436479
#define kh_get(name, h, k) kh_get_##name(h, k)
437480

@@ -493,6 +536,34 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
493536
*/
494537
#define kh_n_buckets(h) ((h)->n_buckets)
495538

539+
/*! @function
540+
@abstract Iterate over the entries in the hash table
541+
@param h Pointer to the hash table [khash_t(name)*]
542+
@param kvar Variable to which key will be assigned
543+
@param vvar Variable to which value will be assigned
544+
@param code Block of code to execute
545+
*/
546+
#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
547+
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
548+
if (!kh_exist(h,__i)) continue; \
549+
(kvar) = kh_key(h,__i); \
550+
(vvar) = kh_val(h,__i); \
551+
code; \
552+
} }
553+
554+
/*! @function
555+
@abstract Iterate over the values in the hash table
556+
@param h Pointer to the hash table [khash_t(name)*]
557+
@param vvar Variable to which value will be assigned
558+
@param code Block of code to execute
559+
*/
560+
#define kh_foreach_value(h, vvar, code) { khint_t __i; \
561+
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
562+
if (!kh_exist(h,__i)) continue; \
563+
(vvar) = kh_val(h,__i); \
564+
code; \
565+
} }
566+
496567
/* More conenient interfaces */
497568

498569
/*! @function
@@ -531,7 +602,6 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
531602
#define KHASH_MAP_INIT_INT64(name, khval_t) \
532603
KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
533604

534-
535605
typedef const char *kh_cstr_t;
536606
/*! @function
537607
@abstract Instantiate a hash map containing const char* keys

0 commit comments

Comments
 (0)