From 21bfc91f95cf1192d9fbe4e6e2f9c8d444e22511 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 22 Feb 2024 21:41:01 -0500 Subject: [PATCH 1/4] CLN: Assort khash-python cleanups --- .../pandas/vendored/klib/khash_python.h | 42 ++----------------- 1 file changed, 3 insertions(+), 39 deletions(-) diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h index 5a933b45d9e21..948dd47d6844f 100644 --- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h +++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h @@ -1,6 +1,9 @@ // Licence at LICENSES/KLIB_LICENSE +#pragma once + #include +#include #include typedef struct { @@ -12,18 +15,6 @@ typedef struct { double imag; } khcomplex128_t; -// khash should report usage to tracemalloc -#if PY_VERSION_HEX >= 0x03060000 -#include -#if PY_VERSION_HEX < 0x03070000 -#define PyTraceMalloc_Track _PyTraceMalloc_Track -#define PyTraceMalloc_Untrack _PyTraceMalloc_Untrack -#endif -#else -#define PyTraceMalloc_Track(...) -#define PyTraceMalloc_Untrack(...) -#endif - static const int KHASH_TRACE_DOMAIN = 424242; void *traced_malloc(size_t size) { void *ptr = malloc(size); @@ -95,31 +86,12 @@ static inline khuint32_t asuint32(float key) { return val; } -#define ZERO_HASH 0 -#define NAN_HASH 0 - static inline khuint32_t kh_float64_hash_func(double val) { - // 0.0 and -0.0 should have the same hash: - if (val == 0.0) { - return ZERO_HASH; - } - // all nans should have the same hash: - if (val != val) { - return NAN_HASH; - } khuint64_t as_int = asuint64(val); return murmur2_64to32(as_int); } static inline khuint32_t kh_float32_hash_func(float val) { - // 0.0 and -0.0 should have the same hash: - if (val == 0.0f) { - return ZERO_HASH; - } - // all nans should have the same hash: - if (val != val) { - return NAN_HASH; - } khuint32_t as_int = asuint32(val); return murmur2_32to32(as_int); } @@ -231,15 +203,7 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) { } static inline Py_hash_t _Pandas_HashDouble(double val) { - // Since Python3.10, nan is no longer has hash 0 - if (Py_IS_NAN(val)) { - return 0; - } -#if PY_VERSION_HEX < 0x030A0000 - return _Py_HashDouble(val); -#else return _Py_HashDouble(NULL, val); -#endif } static inline Py_hash_t floatobject_hash(PyFloatObject *key) { From 6d619fc4b256093d026f2c57764517b06e7f62b2 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 22 Feb 2024 21:45:00 -0500 Subject: [PATCH 2/4] add static inline to memory tracers --- pandas/_libs/include/pandas/vendored/klib/khash_python.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h index 948dd47d6844f..9d44cb56fa515 100644 --- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h +++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h @@ -16,7 +16,7 @@ typedef struct { } khcomplex128_t; static const int KHASH_TRACE_DOMAIN = 424242; -void *traced_malloc(size_t size) { +static inline void *traced_malloc(size_t size) { void *ptr = malloc(size); if (ptr != NULL) { PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, size); @@ -24,7 +24,7 @@ void *traced_malloc(size_t size) { return ptr; } -void *traced_calloc(size_t num, size_t size) { +static inline void *traced_calloc(size_t num, size_t size) { void *ptr = calloc(num, size); if (ptr != NULL) { PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, num * size); @@ -32,7 +32,7 @@ void *traced_calloc(size_t num, size_t size) { return ptr; } -void *traced_realloc(void *old_ptr, size_t size) { +static inline void *traced_realloc(void *old_ptr, size_t size) { void *ptr = realloc(old_ptr, size); if (ptr != NULL) { if (old_ptr != ptr) { @@ -43,7 +43,7 @@ void *traced_realloc(void *old_ptr, size_t size) { return ptr; } -void traced_free(void *ptr) { +static inline void traced_free(void *ptr) { if (ptr != NULL) { PyTraceMalloc_Untrack(KHASH_TRACE_DOMAIN, (uintptr_t)ptr); } From 6a044d69f8183b5d0f818ed00d1baa9b56d0e0e9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 22 Feb 2024 21:48:26 -0500 Subject: [PATCH 3/4] revert mistake with hashdouble --- pandas/_libs/include/pandas/vendored/klib/khash_python.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h index 9d44cb56fa515..76cce2ac80d01 100644 --- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h +++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h @@ -203,7 +203,12 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) { } static inline Py_hash_t _Pandas_HashDouble(double val) { + // Since Python3.10, nan is no longer has hash 0 +#if PY_VERSION_HEX < 0x030A0000 + return _Py_HashDouble(val); +#else return _Py_HashDouble(NULL, val); +#endif } static inline Py_hash_t floatobject_hash(PyFloatObject *key) { From d545d3f8802dd7424a4fa7c2386c70fa1a93caf1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 23 Feb 2024 09:03:40 -0500 Subject: [PATCH 4/4] try less --- .../pandas/vendored/klib/khash_python.h | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h index 76cce2ac80d01..8bb3f84369b0b 100644 --- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h +++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h @@ -86,12 +86,31 @@ static inline khuint32_t asuint32(float key) { return val; } +#define ZERO_HASH 0 +#define NAN_HASH 0 + static inline khuint32_t kh_float64_hash_func(double val) { + // 0.0 and -0.0 should have the same hash: + if (val == 0.0) { + return ZERO_HASH; + } + // all nans should have the same hash: + if (val != val) { + return NAN_HASH; + } khuint64_t as_int = asuint64(val); return murmur2_64to32(as_int); } static inline khuint32_t kh_float32_hash_func(float val) { + // 0.0 and -0.0 should have the same hash: + if (val == 0.0f) { + return ZERO_HASH; + } + // all nans should have the same hash: + if (val != val) { + return NAN_HASH; + } khuint32_t as_int = asuint32(val); return murmur2_32to32(as_int); } @@ -204,6 +223,9 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) { static inline Py_hash_t _Pandas_HashDouble(double val) { // Since Python3.10, nan is no longer has hash 0 + if (Py_IS_NAN(val)) { + return 0; + } #if PY_VERSION_HEX < 0x030A0000 return _Py_HashDouble(val); #else