@@ -47,6 +47,23 @@ int main() {
47
47
*/
48
48
49
49
/*
50
+ 2013-05-02 (0.2.8):
51
+
52
+ * Use quadratic probing. When the capacity is power of 2, stepping function
53
+ i*(i+1)/2 guarantees to traverse each bucket. It is better than double
54
+ hashing on cache performance and is more robust than linear probing.
55
+
56
+ In theory, double hashing should be more robust than quadratic probing.
57
+ However, my implementation is probably not for large hash tables, because
58
+ the second hash function is closely tied to the first hash function,
59
+ which reduce the effectiveness of double hashing.
60
+
61
+ Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
62
+
63
+ 2011-12-29 (0.2.7):
64
+
65
+ * Minor code clean up; no actual effect.
66
+
50
67
2011-09-16 (0.2.6):
51
68
52
69
* The capacity is a power of 2. This seems to dramatically improve the
@@ -107,12 +124,13 @@ int main() {
107
124
Generic hash table library.
108
125
*/
109
126
110
- #define AC_VERSION_KHASH_H "0.2.6 "
127
+ #define AC_VERSION_KHASH_H "0.2.8 "
111
128
112
129
#include <stdlib.h>
113
130
#include <string.h>
114
131
#include <limits.h>
115
132
133
+ /* compiler specific configuration */
116
134
117
135
#if UINT_MAX == 0xffffffffu
118
136
typedef unsigned int khint32_t ;
@@ -154,50 +172,54 @@ typedef khint_t khiter_t;
154
172
#define __ac_set_isboth_false (flag , i ) __ac_set_isempty_false(flag, i)
155
173
#define __ac_set_isdel_true (flag , i ) (0)
156
174
157
- #ifdef KHASH_LINEAR
158
- #define __ac_inc (k , m ) 1
159
- #else
160
- #define __ac_inc (k , m ) (((k)>>3 ^ (k)<<3) | 1) & (m)
161
- #endif
162
175
163
176
#define __ac_fsize (m ) ((m) < 32? 1 : (m)>>5)
164
177
165
178
#ifndef kroundup32
166
179
#define kroundup32 (x ) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
167
180
#endif
168
181
182
+ #ifndef kcalloc
183
+ #define kcalloc (N ,Z ) calloc(N,Z)
184
+ #endif
185
+ #ifndef kmalloc
186
+ #define kmalloc (Z ) malloc(Z)
187
+ #endif
188
+ #ifndef krealloc
189
+ #define krealloc (P ,Z ) realloc(P,Z)
190
+ #endif
191
+ #ifndef kfree
192
+ #define kfree (P ) free(P)
193
+ #endif
194
+
169
195
static const double __ac_HASH_UPPER = 0.77 ;
170
196
171
- #define KHASH_DECLARE (name , khkey_t , khval_t ) \
172
- typedef struct { \
173
- khint_t n_buckets, size, n_occupied, upper_bound; \
174
- khint32_t *flags; \
175
- khkey_t *keys; \
176
- khval_t *vals; \
177
- } kh_##name##_t; \
178
- extern kh_##name##_t *kh_init_##name(); \
197
+ #define __KHASH_TYPE (name , khkey_t , khval_t ) \
198
+ typedef struct { \
199
+ khint_t n_buckets, size, n_occupied, upper_bound; \
200
+ khint32_t *flags; \
201
+ khkey_t *keys; \
202
+ khval_t *vals; \
203
+ } kh_##name##_t;
204
+
205
+ #define __KHASH_PROTOTYPES (name , khkey_t , khval_t ) \
206
+ extern kh_##name##_t *kh_init_##name(void); \
179
207
extern void kh_destroy_##name(kh_##name##_t *h); \
180
208
extern void kh_clear_##name(kh_##name##_t *h); \
181
209
extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
182
- extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
210
+ extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
183
211
extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret);
184
212
185
- #define KHASH_INIT2 (name , SCOPE , khkey_t , khval_t , kh_is_map , __hash_func , __hash_equal ) \
186
- typedef struct { \
187
- khint_t n_buckets, size, n_occupied, upper_bound; \
188
- khint32_t *flags; \
189
- khkey_t *keys; \
190
- khval_t *vals; \
191
- } kh_##name##_t; \
213
+ #define __KHASH_IMPL (name , SCOPE , khkey_t , khval_t , kh_is_map , __hash_func , __hash_equal ) \
192
214
SCOPE kh_##name##_t *kh_init_##name(void) { \
193
- return (kh_##name##_t*)calloc (1, sizeof(kh_##name##_t)); \
215
+ return (kh_##name##_t*)kcalloc (1, sizeof(kh_##name##_t)); \
194
216
} \
195
217
SCOPE void kh_destroy_##name(kh_##name##_t *h) \
196
218
{ \
197
219
if (h) { \
198
- free( h->keys); free (h->flags); \
199
- free( h->vals); \
200
- free (h); \
220
+ kfree((void *) h->keys); kfree (h->flags); \
221
+ kfree((void *) h->vals); \
222
+ kfree (h); \
201
223
} \
202
224
} \
203
225
SCOPE void kh_clear_##name(kh_##name##_t *h) \
@@ -210,31 +232,38 @@ static const double __ac_HASH_UPPER = 0.77;
210
232
SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
211
233
{ \
212
234
if (h->n_buckets) { \
213
- khint_t inc, k, i, last, mask; \
235
+ khint_t k, i, last, mask, step = 0; \
214
236
mask = h->n_buckets - 1; \
215
237
k = __hash_func(key); i = k & mask; \
216
- inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
238
+ last = i; \
217
239
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
218
- i = (i + inc ) & mask ; \
240
+ i = (i + (++step)) & mask; \
219
241
if (i == last) return h->n_buckets; \
220
242
} \
221
243
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
222
244
} else return 0; \
223
245
} \
224
- SCOPE void kh_resize_ ##name (kh_##name##_t *h, khint_t new_n_buckets) \
225
- { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
246
+ SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
247
+ { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
226
248
khint32_t * new_flags = 0 ; \
227
249
khint_t j = 1 ; \
228
250
{ \
229
251
kroundup32 (new_n_buckets ); \
230
252
if (new_n_buckets < 4 ) new_n_buckets = 4 ; \
231
253
if (h -> size >= (khint_t )(new_n_buckets * __ac_HASH_UPPER + 0.5 )) j = 0 ; /* requested size is too small */ \
232
254
else { /* hash table size to be changed (shrink or expand); rehash */ \
233
- new_flags = (khint32_t * )malloc (__ac_fsize (new_n_buckets ) * sizeof (khint32_t )); \
255
+ new_flags = (khint32_t * )kmalloc (__ac_fsize (new_n_buckets ) * sizeof (khint32_t )); \
256
+ if (!new_flags ) return -1 ; \
234
257
memset (new_flags , 0xff , __ac_fsize (new_n_buckets ) * sizeof (khint32_t )); \
235
258
if (h -> n_buckets < new_n_buckets ) { /* expand */ \
236
- h -> keys = (khkey_t * )realloc (h -> keys , new_n_buckets * sizeof (khkey_t )); \
237
- if (kh_is_map ) h -> vals = (khval_t * )realloc (h -> vals , new_n_buckets * sizeof (khval_t )); \
259
+ khkey_t * new_keys = (khkey_t * )krealloc ((void * )h -> keys , new_n_buckets * sizeof (khkey_t )); \
260
+ if (!new_keys ) { kfree (new_flags ); return -1 ; } \
261
+ h -> keys = new_keys ; \
262
+ if (kh_is_map ) { \
263
+ khval_t * new_vals = (khval_t * )krealloc ((void * )h -> vals , new_n_buckets * sizeof (khval_t )); \
264
+ if (!new_vals ) { kfree (new_flags ); return -1 ; } \
265
+ h -> vals = new_vals ; \
266
+ } \
238
267
} /* otherwise shrink */ \
239
268
} \
240
269
} \
@@ -248,11 +277,10 @@ static const double __ac_HASH_UPPER = 0.77;
248
277
if (kh_is_map ) val = h -> vals [j ]; \
249
278
__ac_set_isempty_true (h -> flags , j ); \
250
279
while (1 ) { /* kick-out process; sort of like in Cuckoo hashing */ \
251
- khint_t inc , k , i ; \
280
+ khint_t k , i , step = 0 ; \
252
281
k = __hash_func (key ); \
253
282
i = k & new_mask ; \
254
- inc = __ac_inc (k , new_mask ); \
255
- while (!__ac_isempty (new_flags , i )) i = (i + inc ) & new_mask ; \
283
+ while (!__ac_isempty (new_flags , i )) i = (i + (++ step )) & new_mask ; \
256
284
__ac_set_isempty_false (new_flags , i ); \
257
285
if (i < h -> n_buckets && __ac_iseither (h -> flags , i ) == 0 ) { /* kick out the existing element */ \
258
286
{ khkey_t tmp = h -> keys [i ]; h -> keys [i ] = key ; key = tmp ; } \
@@ -267,32 +295,38 @@ static const double __ac_HASH_UPPER = 0.77;
267
295
} \
268
296
} \
269
297
if (h -> n_buckets > new_n_buckets ) { /* shrink the hash table */ \
270
- h -> keys = (khkey_t * )realloc ( h -> keys , new_n_buckets * sizeof (khkey_t )); \
271
- if (kh_is_map ) h -> vals = (khval_t * )realloc ( h -> vals , new_n_buckets * sizeof (khval_t )); \
298
+ h -> keys = (khkey_t * )krealloc (( void * ) h -> keys , new_n_buckets * sizeof (khkey_t )); \
299
+ if (kh_is_map ) h -> vals = (khval_t * )krealloc (( void * ) h -> vals , new_n_buckets * sizeof (khval_t )); \
272
300
} \
273
- free (h -> flags ); /* free the working space */ \
301
+ kfree (h -> flags ); /* free the working space */ \
274
302
h -> flags = new_flags ; \
275
303
h -> n_buckets = new_n_buckets ; \
276
304
h -> n_occupied = h -> size ; \
277
305
h -> upper_bound = (khint_t )(h -> n_buckets * __ac_HASH_UPPER + 0.5 ); \
278
306
} \
307
+ return 0 ; \
279
308
} \
280
309
SCOPE khint_t kh_put_ ##name (kh_##name##_t *h, khkey_t key, int *ret) \
281
310
{ \
282
311
khint_t x; \
283
312
if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
284
- if (h -> n_buckets > (h -> size <<1 )) kh_resize_ ##name (h, h->n_buckets - 1); /* clear "deleted" elements */ \
285
- else kh_resize_ ##name (h, h->n_buckets + 1); /* expand the hash table */ \
313
+ if (h -> n_buckets > (h -> size <<1 )) { \
314
+ if (kh_resize_ ##name (h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
315
+ * ret = -1 ; return h -> n_buckets ; \
316
+ } \
317
+ } else if (kh_resize_ ##name (h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
318
+ * ret = -1 ; return h -> n_buckets ; \
319
+ } \
286
320
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
287
321
{ \
288
- khint_t inc , k , i , site , last , mask = h -> n_buckets - 1 ; \
322
+ khint_t k , i , site , last , mask = h -> n_buckets - 1 , step = 0 ; \
289
323
x = site = h -> n_buckets ; k = __hash_func (key ); i = k & mask ; \
290
324
if (__ac_isempty (h -> flags , i )) x = i ; /* for speed up */ \
291
325
else { \
292
- inc = __ac_inc ( k , mask ); last = i ; \
326
+ last = i ; \
293
327
while (!__ac_isempty (h -> flags , i ) && (__ac_isdel (h -> flags , i ) || !__hash_equal (h -> keys [i ], key ))) { \
294
328
if (__ac_isdel (h -> flags , i )) site = i ; \
295
- i = (i + inc ) & mask ; \
329
+ i = (i + ( ++ step )) & mask ; \
296
330
if (i == last ) { x = site ; break ; } \
297
331
} \
298
332
if (x == h -> n_buckets ) { \
@@ -315,6 +349,14 @@ static const double __ac_HASH_UPPER = 0.77;
315
349
return x ; \
316
350
}
317
351
352
+ #define KHASH_DECLARE (name , khkey_t , khval_t ) \
353
+ __KHASH_TYPE(name, khkey_t, khval_t) \
354
+ __KHASH_PROTOTYPES(name, khkey_t, khval_t)
355
+
356
+ #define KHASH_INIT2 (name , SCOPE , khkey_t , khval_t , kh_is_map , __hash_func , __hash_equal ) \
357
+ __KHASH_TYPE(name, khkey_t, khval_t) \
358
+ __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
359
+
318
360
#define KHASH_INIT (name , khkey_t , khval_t , kh_is_map , __hash_func , __hash_equal ) \
319
361
KHASH_INIT2(name, static PANDAS_INLINE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
320
362
@@ -348,8 +390,8 @@ static const double __ac_HASH_UPPER = 0.77;
348
390
*/
349
391
static PANDAS_INLINE khint_t __ac_X31_hash_string (const char * s )
350
392
{
351
- khint_t h = * s ;
352
- if (h ) for (++ s ; * s ; ++ s ) h = (h << 5 ) - h + * s ;
393
+ khint_t h = ( khint_t ) * s ;
394
+ if (h ) for (++ s ; * s ; ++ s ) h = (h << 5 ) - h + ( khint_t ) * s ;
353
395
return h ;
354
396
}
355
397
/*! @function
@@ -419,7 +461,8 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
419
461
@param name Name of the hash table [symbol]
420
462
@param h Pointer to the hash table [khash_t(name)*]
421
463
@param k Key [type of keys]
422
- @param r Extra return code: 0 if the key is present in the hash table;
464
+ @param r Extra return code: -1 if the operation failed;
465
+ 0 if the key is present in the hash table;
423
466
1 if the bucket is empty (never used); 2 if the element in
424
467
the bucket has been deleted [int*]
425
468
@return Iterator to the inserted element [khint_t]
@@ -431,7 +474,7 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
431
474
@param name Name of the hash table [symbol]
432
475
@param h Pointer to the hash table [khash_t(name)*]
433
476
@param k Key [type of keys]
434
- @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
477
+ @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
435
478
*/
436
479
#define kh_get (name , h , k ) kh_get_##name(h, k)
437
480
@@ -493,6 +536,34 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
493
536
*/
494
537
#define kh_n_buckets (h ) ((h)->n_buckets)
495
538
539
+ /*! @function
540
+ @abstract Iterate over the entries in the hash table
541
+ @param h Pointer to the hash table [khash_t(name)*]
542
+ @param kvar Variable to which key will be assigned
543
+ @param vvar Variable to which value will be assigned
544
+ @param code Block of code to execute
545
+ */
546
+ #define kh_foreach (h , kvar , vvar , code ) { khint_t __i; \
547
+ for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
548
+ if (!kh_exist(h,__i)) continue; \
549
+ (kvar) = kh_key(h,__i); \
550
+ (vvar) = kh_val(h,__i); \
551
+ code; \
552
+ } }
553
+
554
+ /*! @function
555
+ @abstract Iterate over the values in the hash table
556
+ @param h Pointer to the hash table [khash_t(name)*]
557
+ @param vvar Variable to which value will be assigned
558
+ @param code Block of code to execute
559
+ */
560
+ #define kh_foreach_value (h , vvar , code ) { khint_t __i; \
561
+ for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
562
+ if (!kh_exist(h,__i)) continue; \
563
+ (vvar) = kh_val(h,__i); \
564
+ code; \
565
+ } }
566
+
496
567
/* More conenient interfaces */
497
568
498
569
/*! @function
@@ -531,7 +602,6 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
531
602
#define KHASH_MAP_INIT_INT64 (name , khval_t ) \
532
603
KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
533
604
534
-
535
605
typedef const char * kh_cstr_t ;
536
606
/*! @function
537
607
@abstract Instantiate a hash map containing const char* keys
0 commit comments