@@ -251,7 +251,7 @@ int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
251
251
}
252
252
253
253
254
- Py_hash_t PANDAS_INLINE _Pandas_HashDouble (double val ){
254
+ Py_hash_t PANDAS_INLINE _Pandas_HashDouble (double val ) {
255
255
//Since Python3.10, nan is no longer has hash 0
256
256
if (Py_IS_NAN (val )) {
257
257
return 0 ;
@@ -264,13 +264,13 @@ Py_hash_t PANDAS_INLINE _Pandas_HashDouble(double val){
264
264
}
265
265
266
266
267
- Py_hash_t PANDAS_INLINE floatobject_hash (PyFloatObject * key ){
267
+ Py_hash_t PANDAS_INLINE floatobject_hash (PyFloatObject * key ) {
268
268
return _Pandas_HashDouble (PyFloat_AS_DOUBLE (key ));
269
269
}
270
270
271
271
272
272
// replaces _Py_HashDouble with _Pandas_HashDouble
273
- Py_hash_t PANDAS_INLINE complexobject_hash (PyComplexObject * key ){
273
+ Py_hash_t PANDAS_INLINE complexobject_hash (PyComplexObject * key ) {
274
274
Py_uhash_t realhash = (Py_uhash_t )_Pandas_HashDouble (key -> cval .real );
275
275
Py_uhash_t imaghash = (Py_uhash_t )_Pandas_HashDouble (key -> cval .imag );
276
276
if (realhash == (Py_uhash_t )- 1 || imaghash == (Py_uhash_t )- 1 ) {
@@ -284,11 +284,52 @@ Py_hash_t PANDAS_INLINE complexobject_hash(PyComplexObject* key){
284
284
}
285
285
286
286
287
- khint32_t PANDAS_INLINE kh_python_hash_func (PyObject * key ){
287
+ khint32_t PANDAS_INLINE kh_python_hash_func (PyObject * key );
288
+
289
+ //we could use any hashing algorithm, this is the original CPython's for tuples
290
+
291
+ #if SIZEOF_PY_UHASH_T > 4
292
+ #define _PandasHASH_XXPRIME_1 ((Py_uhash_t)11400714785074694791ULL)
293
+ #define _PandasHASH_XXPRIME_2 ((Py_uhash_t)14029467366897019727ULL)
294
+ #define _PandasHASH_XXPRIME_5 ((Py_uhash_t)2870177450012600261ULL)
295
+ #define _PandasHASH_XXROTATE (x ) ((x << 31) | (x >> 33)) /* Rotate left 31 bits */
296
+ #else
297
+ #define _PandasHASH_XXPRIME_1 ((Py_uhash_t)2654435761UL)
298
+ #define _PandasHASH_XXPRIME_2 ((Py_uhash_t)2246822519UL)
299
+ #define _PandasHASH_XXPRIME_5 ((Py_uhash_t)374761393UL)
300
+ #define _PandasHASH_XXROTATE (x ) ((x << 13) | (x >> 19)) /* Rotate left 13 bits */
301
+ #endif
302
+
303
+ Py_hash_t PANDAS_INLINE tupleobject_hash (PyTupleObject * key ) {
304
+ Py_ssize_t i , len = Py_SIZE (key );
305
+ PyObject * * item = key -> ob_item ;
306
+
307
+ Py_uhash_t acc = _PandasHASH_XXPRIME_5 ;
308
+ for (i = 0 ; i < len ; i ++ ) {
309
+ Py_uhash_t lane = kh_python_hash_func (item [i ]);
310
+ if (lane == (Py_uhash_t )- 1 ) {
311
+ return -1 ;
312
+ }
313
+ acc += lane * _PandasHASH_XXPRIME_2 ;
314
+ acc = _PandasHASH_XXROTATE (acc );
315
+ acc *= _PandasHASH_XXPRIME_1 ;
316
+ }
317
+
318
+ /* Add input length, mangled to keep the historical value of hash(()). */
319
+ acc += len ^ (_PandasHASH_XXPRIME_5 ^ 3527539UL );
320
+
321
+ if (acc == (Py_uhash_t )- 1 ) {
322
+ return 1546275796 ;
323
+ }
324
+ return acc ;
325
+ }
326
+
327
+
328
+ khint32_t PANDAS_INLINE kh_python_hash_func (PyObject * key ) {
288
329
Py_hash_t hash ;
289
330
// For PyObject_Hash holds:
290
331
// hash(0.0) == 0 == hash(-0.0)
291
- // yet for different nan-object different hash-values
332
+ // yet for different nan-objects different hash-values
292
333
// are possible
293
334
if (PyFloat_CheckExact (key )) {
294
335
// we cannot use kh_float64_hash_func
@@ -302,6 +343,9 @@ khint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key){
302
343
// and kh_complex128_hash_func doesn't respect it
303
344
hash = complexobject_hash ((PyComplexObject * )key );
304
345
}
346
+ else if (PyTuple_CheckExact (key )) {
347
+ hash = tupleobject_hash ((PyTupleObject * )key );
348
+ }
305
349
else {
306
350
hash = PyObject_Hash (key );
307
351
}
0 commit comments