|
13 | 13 | // is 64 bits the truncation causes collission issues. Given all that, we use our own
|
14 | 14 | // simple hash, viewing the double bytes as an int64 and using khash's default
|
15 | 15 | // hash for 64 bit integers.
|
16 |
| -// GH 13436 |
| 16 | +// GH 13436 showed that _Py_HashDouble doesn't work well with khash |
| 17 | +// GH 28303 showed, that the simple xoring-version isn't good enough |
| 18 | +// thus murmur2-hash is used |
| 19 | + |
17 | 20 | khint64_t PANDAS_INLINE asint64(double key) {
|
18 |
| - khint64_t val; |
19 |
| - memcpy(&val, &key, sizeof(double)); |
20 |
| - return val; |
| 21 | + khint64_t val; |
| 22 | + memcpy(&val, &key, sizeof(double)); |
| 23 | + return val; |
21 | 24 | }
|
22 | 25 |
|
23 |
| -// correct for all inputs but not -0.0 and NaNs |
24 |
| -#define kh_float64_hash_func_0_NAN(key) (khint32_t)((asint64(key))>>33^(asint64(key))^(asint64(key))<<11) |
25 |
| - |
26 |
| -// correct for all inputs but not NaNs |
27 |
| -#define kh_float64_hash_func_NAN(key) ((key) == 0.0 ? \ |
28 |
| - kh_float64_hash_func_0_NAN(0.0) : \ |
29 |
| - kh_float64_hash_func_0_NAN(key)) |
| 26 | +#define ZERO_HASH 0 |
| 27 | +#define NAN_HASH 0 |
30 | 28 |
|
31 |
| -// correct for all |
32 |
| -#define kh_float64_hash_func(key) ((key) != (key) ? \ |
33 |
| - kh_float64_hash_func_NAN(Py_NAN) : \ |
34 |
| - kh_float64_hash_func_NAN(key)) |
| 29 | +khint32_t PANDAS_INLINE kh_float64_hash_func(double val){ |
| 30 | + // 0.0 and -0.0 should have the same hash: |
| 31 | + if (val == 0.0){ |
| 32 | + return ZERO_HASH; |
| 33 | + } |
| 34 | + // all nans should have the same hash: |
| 35 | + if ( val!=val ){ |
| 36 | + return NAN_HASH; |
| 37 | + } |
| 38 | + khint64_t as_int = asint64(val); |
| 39 | + return murmur2_64to32(as_int); |
| 40 | +} |
35 | 41 |
|
36 | 42 | #define kh_float64_hash_equal(a, b) ((a) == (b) || ((b) != (b) && (a) != (a)))
|
37 | 43 |
|
|
0 commit comments