@@ -251,12 +251,105 @@ int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
251
251
}
252
252
253
253
254
- khint32_t PANDAS_INLINE kh_python_hash_func (PyObject * key ){
254
+ Py_hash_t PANDAS_INLINE _Pandas_HashDouble (double val ) {
255
+ //Since Python3.10, nan is no longer has hash 0
256
+ if (Py_IS_NAN (val )) {
257
+ return 0 ;
258
+ }
259
+ #if PY_VERSION_HEX < 0x030A0000
260
+ return _Py_HashDouble (val );
261
+ #else
262
+ return _Py_HashDouble (NULL , val );
263
+ #endif
264
+ }
265
+
266
+
267
+ Py_hash_t PANDAS_INLINE floatobject_hash (PyFloatObject * key ) {
268
+ return _Pandas_HashDouble (PyFloat_AS_DOUBLE (key ));
269
+ }
270
+
271
+
272
+ // replaces _Py_HashDouble with _Pandas_HashDouble
273
+ Py_hash_t PANDAS_INLINE complexobject_hash (PyComplexObject * key ) {
274
+ Py_uhash_t realhash = (Py_uhash_t )_Pandas_HashDouble (key -> cval .real );
275
+ Py_uhash_t imaghash = (Py_uhash_t )_Pandas_HashDouble (key -> cval .imag );
276
+ if (realhash == (Py_uhash_t )- 1 || imaghash == (Py_uhash_t )- 1 ) {
277
+ return -1 ;
278
+ }
279
+ Py_uhash_t combined = realhash + _PyHASH_IMAG * imaghash ;
280
+ if (combined == (Py_uhash_t )- 1 ) {
281
+ return -2 ;
282
+ }
283
+ return (Py_hash_t )combined ;
284
+ }
285
+
286
+
287
+ khint32_t PANDAS_INLINE kh_python_hash_func (PyObject * key );
288
+
289
+ //we could use any hashing algorithm, this is the original CPython's for tuples
290
+
291
+ #if SIZEOF_PY_UHASH_T > 4
292
+ #define _PandasHASH_XXPRIME_1 ((Py_uhash_t)11400714785074694791ULL)
293
+ #define _PandasHASH_XXPRIME_2 ((Py_uhash_t)14029467366897019727ULL)
294
+ #define _PandasHASH_XXPRIME_5 ((Py_uhash_t)2870177450012600261ULL)
295
+ #define _PandasHASH_XXROTATE (x ) ((x << 31) | (x >> 33)) /* Rotate left 31 bits */
296
+ #else
297
+ #define _PandasHASH_XXPRIME_1 ((Py_uhash_t)2654435761UL)
298
+ #define _PandasHASH_XXPRIME_2 ((Py_uhash_t)2246822519UL)
299
+ #define _PandasHASH_XXPRIME_5 ((Py_uhash_t)374761393UL)
300
+ #define _PandasHASH_XXROTATE (x ) ((x << 13) | (x >> 19)) /* Rotate left 13 bits */
301
+ #endif
302
+
303
+ Py_hash_t PANDAS_INLINE tupleobject_hash (PyTupleObject * key ) {
304
+ Py_ssize_t i , len = Py_SIZE (key );
305
+ PyObject * * item = key -> ob_item ;
306
+
307
+ Py_uhash_t acc = _PandasHASH_XXPRIME_5 ;
308
+ for (i = 0 ; i < len ; i ++ ) {
309
+ Py_uhash_t lane = kh_python_hash_func (item [i ]);
310
+ if (lane == (Py_uhash_t )- 1 ) {
311
+ return -1 ;
312
+ }
313
+ acc += lane * _PandasHASH_XXPRIME_2 ;
314
+ acc = _PandasHASH_XXROTATE (acc );
315
+ acc *= _PandasHASH_XXPRIME_1 ;
316
+ }
317
+
318
+ /* Add input length, mangled to keep the historical value of hash(()). */
319
+ acc += len ^ (_PandasHASH_XXPRIME_5 ^ 3527539UL );
320
+
321
+ if (acc == (Py_uhash_t )- 1 ) {
322
+ return 1546275796 ;
323
+ }
324
+ return acc ;
325
+ }
326
+
327
+
328
+ khint32_t PANDAS_INLINE kh_python_hash_func (PyObject * key ) {
329
+ Py_hash_t hash ;
255
330
// For PyObject_Hash holds:
256
331
// hash(0.0) == 0 == hash(-0.0)
257
- // hash(X) == 0 if X is a NaN-value
258
- // so it is OK to use it directly for doubles
259
- Py_hash_t hash = PyObject_Hash (key );
332
+ // yet for different nan-objects different hash-values
333
+ // are possible
334
+ if (PyFloat_CheckExact (key )) {
335
+ // we cannot use kh_float64_hash_func
336
+ // becase float(k) == k holds for any int-object k
337
+ // and kh_float64_hash_func doesn't respect it
338
+ hash = floatobject_hash ((PyFloatObject * )key );
339
+ }
340
+ else if (PyComplex_CheckExact (key )) {
341
+ // we cannot use kh_complex128_hash_func
342
+ // becase complex(k,0) == k holds for any int-object k
343
+ // and kh_complex128_hash_func doesn't respect it
344
+ hash = complexobject_hash ((PyComplexObject * )key );
345
+ }
346
+ else if (PyTuple_CheckExact (key )) {
347
+ hash = tupleobject_hash ((PyTupleObject * )key );
348
+ }
349
+ else {
350
+ hash = PyObject_Hash (key );
351
+ }
352
+
260
353
if (hash == -1 ) {
261
354
PyErr_Clear ();
262
355
return 0 ;
0 commit comments