Skip to content

Commit 160fcf3

Browse files
committed
using probing strategy from pythons dict
1 parent e3204b2 commit 160fcf3

File tree

1 file changed

+21
-16
lines changed

1 file changed

+21
-16
lines changed

pandas/_libs/src/klib/khash.h

+21-16
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,9 @@ typedef khint_t khiter_t;
143143
#define __ac_set_isboth_false(flag, i) __ac_set_isempty_false(flag, i)
144144
#define __ac_set_isdel_true(flag, i) ((void)0)
145145

146-
#ifdef KHASH_LINEAR
147-
#define __ac_inc(k, m) 1
148-
#else
149-
#define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m)
150-
#endif
146+
// probing strategy follows implementation
147+
// of dictobject.c in Python 3.9:
148+
#define PERTURB_SHIFT 5
151149

152150
#define __ac_fsize(m) ((m) < 32? 1 : (m)>>5)
153151

@@ -200,13 +198,15 @@ static const double __ac_HASH_UPPER = 0.77;
200198
SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
201199
{ \
202200
if (h->n_buckets) { \
203-
khint_t inc, k, i, last, mask; \
201+
khint_t perturb, k, i, last, mask; \
204202
mask = h->n_buckets - 1; \
205203
k = __hash_func(key); i = k & mask; \
206-
inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
204+
perturb = k; last = i; \
207205
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
208-
i = (i + inc) & mask; \
209-
if (i == last) return h->n_buckets; \
206+
if(perturb!=0 && perturb < (1<<PERTURB_SHIFT)){ last = i;} \
207+
perturb >>= PERTURB_SHIFT; \
208+
i = mask & (i*5 + perturb + 1); \
209+
if (perturb == 0 && i == last) return h->n_buckets; \
210210
} \
211211
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
212212
} else return 0; \
@@ -238,11 +238,14 @@ static const double __ac_HASH_UPPER = 0.77;
238238
if (kh_is_map) val = h->vals[j]; \
239239
__ac_set_isempty_true(h->flags, j); \
240240
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
241-
khint_t inc, k, i; \
241+
khint_t perturb, k, i; \
242242
k = __hash_func(key); \
243243
i = k & new_mask; \
244-
inc = __ac_inc(k, new_mask); \
245-
while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
244+
perturb = k; \
245+
while (!__ac_isempty(new_flags, i)){ \
246+
perturb >>= PERTURB_SHIFT; \
247+
i = new_mask & (i*5 + perturb + 1); \
248+
} \
246249
__ac_set_isempty_false(new_flags, i); \
247250
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
248251
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
@@ -275,15 +278,17 @@ static const double __ac_HASH_UPPER = 0.77;
275278
else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \
276279
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
277280
{ \
278-
khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \
281+
khint_t perturb, k, i, site, last, mask = h->n_buckets - 1; \
279282
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
280283
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
281284
else { \
282-
inc = __ac_inc(k, mask); last = i; \
285+
perturb = k; last = i; \
283286
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
284287
if (__ac_isdel(h->flags, i)) site = i; \
285-
i = (i + inc) & mask; \
286-
if (i == last) { x = site; break; } \
288+
if(perturb!=0 && perturb < (1<<PERTURB_SHIFT)){ last = i;} \
289+
perturb >>= PERTURB_SHIFT; \
290+
i = mask & (i*5 + perturb + 1); \
291+
if (perturb == 0 && i == last) { x = site; break; } \
287292
} \
288293
if (x == h->n_buckets) { \
289294
if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \

0 commit comments

Comments
 (0)