Skip to content

Commit 2317bf0

Browse files
authored
Vendored klib quadatric probing (#49197)
Vendored klib quadtric probing
1 parent 7fddb30 commit 2317bf0

File tree

1 file changed

+19
-18
lines changed

1 file changed

+19
-18
lines changed

pandas/_libs/src/klib/khash.h

+19-18
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,16 @@ int main() {
4747
*/
4848

4949
/*
50+
2013-05-02 (0.2.8):
51+
* Use quadratic probing. When the capacity is power of 2, stepping function
52+
i*(i+1)/2 guarantees to traverse each bucket. It is better than double
53+
hashing on cache performance and is more robust than linear probing.
54+
In theory, double hashing should be more robust than quadratic probing.
55+
However, my implementation is probably not for large hash tables, because
56+
the second hash function is closely tied to the first hash function,
57+
which reduce the effectiveness of double hashing.
58+
Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
59+
5060
2011-09-16 (0.2.6):
5161
5262
* The capacity is a power of 2. This seems to dramatically improve the
@@ -107,7 +117,7 @@ int main() {
107117
Generic hash table library.
108118
*/
109119

110-
#define AC_VERSION_KHASH_H "0.2.6"
120+
#define AC_VERSION_KHASH_H "0.2.8"
111121

112122
#include <stdlib.h>
113123
#include <string.h>
@@ -177,7 +187,6 @@ typedef khuint_t khiter_t;
177187
#define __ac_set_isboth_false(flag, i) __ac_set_isempty_false(flag, i)
178188
#define __ac_set_isdel_true(flag, i) ((void)0)
179189

180-
181190
// specializations of https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
182191
khuint32_t PANDAS_INLINE murmur2_32to32(khuint32_t k){
183192
const khuint32_t SEED = 0xc70f6907UL;
@@ -252,13 +261,6 @@ khuint32_t PANDAS_INLINE murmur2_64to32(khuint64_t k){
252261
return murmur2_32_32to32(k1, k2);
253262
}
254263

255-
256-
#ifdef KHASH_LINEAR
257-
#define __ac_inc(k, m) 1
258-
#else
259-
#define __ac_inc(k, m) (murmur2_32to32(k) | 1) & (m)
260-
#endif
261-
262264
#define __ac_fsize(m) ((m) < 32? 1 : (m)>>5)
263265

264266
#ifndef kroundup32
@@ -310,12 +312,12 @@ static const double __ac_HASH_UPPER = 0.77;
310312
SCOPE khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
311313
{ \
312314
if (h->n_buckets) { \
313-
khuint_t inc, k, i, last, mask; \
315+
khuint_t k, i, last, mask, step=0;\
314316
mask = h->n_buckets - 1; \
315317
k = __hash_func(key); i = k & mask; \
316-
inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
318+
last = i; \
317319
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
318-
i = (i + inc) & mask; \
320+
i = (i + ++step) & mask; \
319321
if (i == last) return h->n_buckets; \
320322
} \
321323
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
@@ -348,11 +350,10 @@ static const double __ac_HASH_UPPER = 0.77;
348350
if (kh_is_map) val = h->vals[j]; \
349351
__ac_set_isempty_true(h->flags, j); \
350352
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
351-
khuint_t inc, k, i; \
353+
khuint_t k, i, step=0;\
352354
k = __hash_func(key); \
353355
i = k & new_mask; \
354-
inc = __ac_inc(k, new_mask); \
355-
while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
356+
while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
356357
__ac_set_isempty_false(new_flags, i); \
357358
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
358359
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
@@ -385,14 +386,14 @@ static const double __ac_HASH_UPPER = 0.77;
385386
else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \
386387
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
387388
{ \
388-
khuint_t inc, k, i, site, last, mask = h->n_buckets - 1; \
389+
khuint_t k, i, site, last, mask = h->n_buckets - 1, step=0;\
389390
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
390391
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
391392
else { \
392-
inc = __ac_inc(k, mask); last = i; \
393+
last = i ; \
393394
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
394395
if (__ac_isdel(h->flags, i)) site = i; \
395-
i = (i + inc) & mask; \
396+
i = (i + (++step)) & mask; \
396397
if (i == last) { x = site; break; } \
397398
} \
398399
if (x == h->n_buckets) { \

0 commit comments

Comments
 (0)