Skip to content

Commit 253daaa

Browse files
authored
Revert "Vendored klib quadatric probing (#49197)" (#49855)
This reverts commit 2317bf0.
1 parent 9df44ea commit 253daaa

File tree

1 file changed

+18
-19
lines changed

1 file changed

+18
-19
lines changed

pandas/_libs/src/klib/khash.h

+18-19
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,6 @@ int main() {
4747
*/
4848

4949
/*
50-
2013-05-02 (0.2.8):
51-
* Use quadratic probing. When the capacity is power of 2, stepping function
52-
i*(i+1)/2 guarantees to traverse each bucket. It is better than double
53-
hashing on cache performance and is more robust than linear probing.
54-
In theory, double hashing should be more robust than quadratic probing.
55-
However, my implementation is probably not for large hash tables, because
56-
the second hash function is closely tied to the first hash function,
57-
which reduce the effectiveness of double hashing.
58-
Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
59-
6050
2011-09-16 (0.2.6):
6151
6252
* The capacity is a power of 2. This seems to dramatically improve the
@@ -117,7 +107,7 @@ int main() {
117107
Generic hash table library.
118108
*/
119109

120-
#define AC_VERSION_KHASH_H "0.2.8"
110+
#define AC_VERSION_KHASH_H "0.2.6"
121111

122112
#include <stdlib.h>
123113
#include <string.h>
@@ -187,6 +177,7 @@ typedef khuint_t khiter_t;
187177
#define __ac_set_isboth_false(flag, i) __ac_set_isempty_false(flag, i)
188178
#define __ac_set_isdel_true(flag, i) ((void)0)
189179

180+
190181
// specializations of https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
191182
khuint32_t PANDAS_INLINE murmur2_32to32(khuint32_t k){
192183
const khuint32_t SEED = 0xc70f6907UL;
@@ -261,6 +252,13 @@ khuint32_t PANDAS_INLINE murmur2_64to32(khuint64_t k){
261252
return murmur2_32_32to32(k1, k2);
262253
}
263254

255+
256+
#ifdef KHASH_LINEAR
257+
#define __ac_inc(k, m) 1
258+
#else
259+
#define __ac_inc(k, m) (murmur2_32to32(k) | 1) & (m)
260+
#endif
261+
264262
#define __ac_fsize(m) ((m) < 32? 1 : (m)>>5)
265263

266264
#ifndef kroundup32
@@ -312,12 +310,12 @@ static const double __ac_HASH_UPPER = 0.77;
312310
SCOPE khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
313311
{ \
314312
if (h->n_buckets) { \
315-
khuint_t k, i, last, mask, step=0;\
313+
khuint_t inc, k, i, last, mask; \
316314
mask = h->n_buckets - 1; \
317315
k = __hash_func(key); i = k & mask; \
318-
last = i; \
316+
inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
319317
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
320-
i = (i + ++step) & mask; \
318+
i = (i + inc) & mask; \
321319
if (i == last) return h->n_buckets; \
322320
} \
323321
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
@@ -350,10 +348,11 @@ static const double __ac_HASH_UPPER = 0.77;
350348
if (kh_is_map) val = h->vals[j]; \
351349
__ac_set_isempty_true(h->flags, j); \
352350
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
353-
khuint_t k, i, step=0;\
351+
khuint_t inc, k, i; \
354352
k = __hash_func(key); \
355353
i = k & new_mask; \
356-
while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
354+
inc = __ac_inc(k, new_mask); \
355+
while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
357356
__ac_set_isempty_false(new_flags, i); \
358357
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
359358
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
@@ -386,14 +385,14 @@ static const double __ac_HASH_UPPER = 0.77;
386385
else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \
387386
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
388387
{ \
389-
khuint_t k, i, site, last, mask = h->n_buckets - 1, step=0;\
388+
khuint_t inc, k, i, site, last, mask = h->n_buckets - 1; \
390389
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
391390
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
392391
else { \
393-
last = i ; \
392+
inc = __ac_inc(k, mask); last = i; \
394393
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
395394
if (__ac_isdel(h->flags, i)) site = i; \
396-
i = (i + (++step)) & mask; \
395+
i = (i + inc) & mask; \
397396
if (i == last) { x = site; break; } \
398397
} \
399398
if (x == h->n_buckets) { \

0 commit comments

Comments
 (0)