From af70a6183e3ea9f9c683b6c80e4bd4fcca8cb822 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 16 Nov 2023 10:13:06 -0500 Subject: [PATCH 1/7] Refactored pandas_timedelta_to_timedeltastruct --- .../src/vendored/numpy/datetime/np_datetime.c | 379 ++++-------------- 1 file changed, 72 insertions(+), 307 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 01e11e5138a8e..7de06d8c6040b 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -711,355 +711,120 @@ void pandas_datetime_to_datetimestruct(npy_datetime dt, NPY_DATETIMEUNIT base, void pandas_timedelta_to_timedeltastruct(npy_timedelta td, NPY_DATETIMEUNIT base, pandas_timedeltastruct *out) { - npy_int64 frac; - npy_int64 sfrac; - npy_int64 ifrac; - int sign; - npy_int64 per_day; - npy_int64 per_sec; - /* Initialize the output to all zeros */ memset(out, 0, sizeof(pandas_timedeltastruct)); - switch (base) { - case NPY_FR_ns: - - per_day = 86400000000000LL; - per_sec = 1000LL * 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; + const npy_int64 sec_per_hour = 3600; + const npy_int64 sec_per_min = 60; - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac / (1000LL * 1000LL); - ifrac -= out->ms * 1000LL * 1000LL; - out->us = ifrac / 1000LL; - ifrac -= out->us * 1000LL; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } + switch (base) { + case (NPY_FR_W): + out->days = 7 * td; break; - - case NPY_FR_us: - - per_day = 86400000000LL; - per_sec = 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac / 1000LL; - ifrac -= out->ms * 1000LL; - out->us = ifrac / 1L; - ifrac -= out->us * 1L; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } + case (NPY_FR_D): + out->days = td; break; - + case NPY_FR_h: + out->days = td / 24LL; + td -= out->days * 24LL; + out->hrs = td; + break; + case NPY_FR_m: + out->days = td / 1440LL; + td -= out->days * 1440LL; + out->hrs = td / 60LL; + td -= out->hrs * 60LL; + out->min = td; + break; + case NPY_FR_s: case NPY_FR_ms: - - per_day = 86400000LL; - per_sec = 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac; - out->us = 0; - out->ns = 0; + case NPY_FR_us: + case NPY_FR_ns: { + const npy_int64 sec_per_day = 86400; + npy_int64 per_sec; + if (base == NPY_FR_s) { + per_sec = 1; + } else if (base == NPY_FR_ms) { + per_sec = 1000; + } else if (base == NPY_FR_us) { + per_sec = 1000000; } else { - out->ms = 0; - out->us = 0; - out->ns = 0; + per_sec = 1000000000; } - break; - - case NPY_FR_s: - // special case where we can simplify many expressions bc per_sec=1 - - per_day = 86400LL; - per_sec = 1L; + const npy_int64 per_day = sec_per_day * per_sec; + npy_int64 frac; // put frac in seconds if (td < 0 && td % per_sec != 0) frac = td / per_sec - 1; else frac = td / per_sec; + const int sign = frac < 0 ? -1 : 1; if (frac < 0) { - sign = -1; - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; + if ((-frac % sec_per_day) != 0) { + out->days = -frac / sec_per_day + 1; + frac += sec_per_day * out->days; } else { frac = -frac; } - } else { - sign = 1; - out->days = 0; } - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; + if (frac >= sec_per_day) { + out->days += frac / sec_per_day; + frac -= out->days * sec_per_day; } - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; + if (frac >= sec_per_hour) { + out->hrs = frac / sec_per_hour; + frac -= out->hrs * sec_per_hour; } - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; + if (frac >= sec_per_min) { + out->min = frac / sec_per_min; + frac -= out->min * sec_per_min; } if (frac >= 0) { out->sec = frac; frac -= out->sec; - } else { - out->sec = 0; } - sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = 0; - out->us = 0; - out->ns = 0; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; + if (base > NPY_FR_s) { + const npy_int64 sfrac = + (out->hrs * sec_per_hour + out->min * sec_per_min + out->sec) * + per_sec; + + if (sign < 0) + out->days = -out->days; + + npy_int64 ifrac = td - (out->days * per_day + sfrac); + + if (base == NPY_FR_ms) { + out->ms = ifrac; + } else if (base == NPY_FR_us) { + out->ms = ifrac / 1000LL; + ifrac = ifrac % 1000LL; + out->us = ifrac; + } else if (base == NPY_FR_ns) { + out->ms = ifrac / (1000LL * 1000LL); + ifrac = ifrac % (1000LL * 1000LL); + out->us = ifrac / 1000LL; + ifrac = ifrac % 1000LL; + out->ns = ifrac; + } } - break; - - case NPY_FR_m: - - out->days = td / 1440LL; - td -= out->days * 1440LL; - out->hrs = td / 60LL; - td -= out->hrs * 60LL; - out->min = td; - - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_h: - out->days = td / 24LL; - td -= out->days * 24LL; - out->hrs = td; - - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_D: - out->days = td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_W: - out->days = 7 * td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; + } break; default: PyErr_SetString(PyExc_RuntimeError, "NumPy timedelta metadata is corrupted with " "invalid base unit"); + break; } - out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->seconds = out->hrs * sec_per_hour + out->min * sec_per_min + out->sec; out->microseconds = out->ms * 1000 + out->us; out->nanoseconds = out->ns; } From 29d115d16543c14d47cb4773a7c2bc8c62614d76 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 17 Nov 2023 00:22:11 -0500 Subject: [PATCH 2/7] use generic macros --- .../src/vendored/numpy/datetime/np_datetime.c | 24 +++++-------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 7de06d8c6040b..d4b86f83804db 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -40,15 +40,9 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #else #if defined __has_builtin #if __has_builtin(__builtin_add_overflow) -#if _LP64 || __LP64__ || _ILP64 || __ILP64__ -#define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) -#define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) -#define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) -#else -#define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) -#define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) -#define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) -#endif +#define checked_int64_add(a, b, res) __builtin_add_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_sub_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_mul_overflow(a, b, res) #else _Static_assert(0, "Overflow checking not detected; please try a newer compiler"); @@ -56,15 +50,9 @@ _Static_assert(0, // __has_builtin was added in gcc 10, but our muslinux_1_1 build environment // only has gcc-9.3, so fall back to __GNUC__ macro as long as we have that #elif __GNUC__ > 7 -#if _LP64 || __LP64__ || _ILP64 || __ILP64__ -#define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) -#define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) -#define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) -#else -#define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) -#define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) -#define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) -#endif +#define checked_int64_add(a, b, res) __builtin_add_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_sub_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_mul_overflow(a, b, res) #else _Static_assert(0, "__has_builtin not detected; please try a newer compiler"); #endif From 3cd02b2c06a02a37f15b034744868fc5a43e97ff Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 17 Nov 2023 00:23:03 -0500 Subject: [PATCH 3/7] Revert "use generic macros" This reverts commit 29d115d16543c14d47cb4773a7c2bc8c62614d76. --- .../src/vendored/numpy/datetime/np_datetime.c | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index d4b86f83804db..7de06d8c6040b 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -40,9 +40,15 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #else #if defined __has_builtin #if __has_builtin(__builtin_add_overflow) -#define checked_int64_add(a, b, res) __builtin_add_overflow(a, b, res) -#define checked_int64_sub(a, b, res) __builtin_sub_overflow(a, b, res) -#define checked_int64_mul(a, b, res) __builtin_mul_overflow(a, b, res) +#if _LP64 || __LP64__ || _ILP64 || __ILP64__ +#define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) +#else +#define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) +#endif #else _Static_assert(0, "Overflow checking not detected; please try a newer compiler"); @@ -50,9 +56,15 @@ _Static_assert(0, // __has_builtin was added in gcc 10, but our muslinux_1_1 build environment // only has gcc-9.3, so fall back to __GNUC__ macro as long as we have that #elif __GNUC__ > 7 -#define checked_int64_add(a, b, res) __builtin_add_overflow(a, b, res) -#define checked_int64_sub(a, b, res) __builtin_sub_overflow(a, b, res) -#define checked_int64_mul(a, b, res) __builtin_mul_overflow(a, b, res) +#if _LP64 || __LP64__ || _ILP64 || __ILP64__ +#define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) +#else +#define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) +#endif #else _Static_assert(0, "__has_builtin not detected; please try a newer compiler"); #endif From 114fd8c03c46c2b90343c8641f176dd902d291f3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 17 Nov 2023 10:56:56 -0500 Subject: [PATCH 4/7] fix sign issue --- pandas/_libs/src/vendored/numpy/datetime/np_datetime.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 7de06d8c6040b..5c175bbc8ae83 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -791,14 +791,14 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, frac -= out->sec; } + if (sign < 0) + out->days = -out->days; + if (base > NPY_FR_s) { const npy_int64 sfrac = (out->hrs * sec_per_hour + out->min * sec_per_min + out->sec) * per_sec; - if (sign < 0) - out->days = -out->days; - npy_int64 ifrac = td - (out->days * per_day + sfrac); if (base == NPY_FR_ms) { From 4dda6885e1be6fec3d7d4789f41f25e61ada1a17 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 11 Dec 2023 13:06:55 -0500 Subject: [PATCH 5/7] wextra fixes --- .../src/vendored/numpy/datetime/np_datetime.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index f2b132419b91c..0b4a8fcc960e4 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -715,7 +715,7 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, case NPY_FR_h: out->days = td / 24LL; td -= out->days * 24LL; - out->hrs = td; + out->hrs = (npy_int32)td; break; case NPY_FR_m: out->days = td / 1440LL; @@ -765,12 +765,12 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, } if (frac >= sec_per_hour) { - out->hrs = frac / sec_per_hour; + out->hrs = (npy_int32)(frac / sec_per_hour); frac -= out->hrs * sec_per_hour; } if (frac >= sec_per_min) { - out->min = frac / sec_per_min; + out->min = (npy_int32)(frac / sec_per_min); frac -= out->min * sec_per_min; } @@ -790,17 +790,17 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, npy_int64 ifrac = td - (out->days * per_day + sfrac); if (base == NPY_FR_ms) { - out->ms = ifrac; + out->ms = (npy_int32)ifrac; } else if (base == NPY_FR_us) { - out->ms = ifrac / 1000LL; + out->ms = (npy_int32)(ifrac / 1000LL); ifrac = ifrac % 1000LL; out->us = ifrac; } else if (base == NPY_FR_ns) { - out->ms = ifrac / (1000LL * 1000LL); + out->ms = (npy_int32)(ifrac / (1000LL * 1000LL)); ifrac = ifrac % (1000LL * 1000LL); - out->us = ifrac / 1000LL; + out->us = (npy_int32)(ifrac / 1000LL); ifrac = ifrac % 1000LL; - out->ns = ifrac; + out->ns = (npy_int32)ifrac; } } @@ -812,7 +812,8 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, break; } - out->seconds = out->hrs * sec_per_hour + out->min * sec_per_min + out->sec; + out->seconds = + (npy_int32)(out->hrs * sec_per_hour + out->min * sec_per_min + out->sec); out->microseconds = out->ms * 1000 + out->us; out->nanoseconds = out->ns; } From 15d82e14dcd0e6ce79ce8dc4b09d00ea50f4ede0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 11 Dec 2023 14:02:26 -0500 Subject: [PATCH 6/7] more wextra --- pandas/_libs/src/vendored/numpy/datetime/np_datetime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 0b4a8fcc960e4..0a1d722a8ce8e 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -794,7 +794,7 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, } else if (base == NPY_FR_us) { out->ms = (npy_int32)(ifrac / 1000LL); ifrac = ifrac % 1000LL; - out->us = ifrac; + out->us = (npy_int32)ifrac; } else if (base == NPY_FR_ns) { out->ms = (npy_int32)(ifrac / (1000LL * 1000LL)); ifrac = ifrac % (1000LL * 1000LL); From a26d62ffbbc9a62d2fbdb13f71df0bdbda45c2ae Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 12 Dec 2023 16:08:27 -0500 Subject: [PATCH 7/7] remove extraneous parantheses --- pandas/_libs/src/vendored/numpy/datetime/np_datetime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 0a1d722a8ce8e..bc9937f111f3e 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -706,10 +706,10 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, const npy_int64 sec_per_min = 60; switch (base) { - case (NPY_FR_W): + case NPY_FR_W: out->days = 7 * td; break; - case (NPY_FR_D): + case NPY_FR_D: out->days = td; break; case NPY_FR_h: