From e27a0da0cdee21d90fc00c13570a4cc4d4c18970 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 15 Sep 2023 16:32:12 -0400 Subject: [PATCH 01/34] refactor npy_datetimestruct_to_datetime --- .../src/vendored/numpy/datetime/np_datetime.c | 216 ++++++++++-------- 1 file changed, 125 insertions(+), 91 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 49016f79de5b9..83af248cf63ee 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -301,104 +301,138 @@ PyObject *extract_utc_offset(PyObject *obj) { return tmp; } + +static inline int64_t scaleYearToEpoch(int64_t year) { return year - 1970; } + +static inline int64_t scaleYearsToMonths(int64_t years) { return years * 12; } + +static inline int64_t scaleDaysToWeeks(int64_t days) { + if (days >= 0) { + return days / 7; + } else { + return (days - 6) / 7; + } +} + +static inline int64_t scaleDaysToHours(int64_t days) { return days * 24; } + +static inline int64_t scaleHoursToMinutes(int64_t hours) { return hours * 60; } + +static inline int64_t scaleMinutesToSeconds(int64_t minutes) { + return minutes * 60; +} + +static inline int64_t scaleSecondsToMilliseconds(int64_t seconds) { + return seconds * 1000; +} + +static inline int64_t scaleSecondsToMicroseconds(int64_t seconds) { + return seconds * 1000000; +} + +static inline int64_t scaleMicrosecondsToNanoseconds(int64_t microseconds) { + return microseconds * 1000; +} + +static inline int64_t scaleMicrosecondsToPicoseconds(int64_t microseconds) { + return microseconds * 1000000; +} + +static inline int64_t scalePicosecondsToFemtoseconds(int64_t picoseconds) { + return picoseconds * 1000; +} + +static inline int64_t scalePicosecondsToAttoseconds(int64_t picoseconds) { + return picoseconds * 1000000; +} + /* * Converts a datetime from a datetimestruct to a datetime based * on a metadata unit. The date is assumed to be valid. */ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, const npy_datetimestruct *dts) { - npy_datetime ret; - - if (base == NPY_FR_Y) { - /* Truncate to the year */ - ret = dts->year - 1970; - } else if (base == NPY_FR_M) { - /* Truncate to the month */ - ret = 12 * (dts->year - 1970) + (dts->month - 1); - } else { - /* Otherwise calculate the number of days to start */ - npy_int64 days = get_datetimestruct_days(dts); - - switch (base) { - case NPY_FR_W: - /* Truncate to weeks */ - if (days >= 0) { - ret = days / 7; - } else { - ret = (days - 6) / 7; - } - break; - case NPY_FR_D: - ret = days; - break; - case NPY_FR_h: - ret = days * 24 + dts->hour; - break; - case NPY_FR_m: - ret = (days * 24 + dts->hour) * 60 + dts->min; - break; - case NPY_FR_s: - ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; - break; - case NPY_FR_ms: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000 + - dts->us / 1000; - break; - case NPY_FR_us: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us; - break; - case NPY_FR_ns: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000 + - dts->ps / 1000; - break; - case NPY_FR_ps: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps; - break; - case NPY_FR_fs: - /* only 2.6 hours */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000 + - dts->as / 1000; - break; - case NPY_FR_as: - /* only 9.2 secs */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000000 + - dts->as; - break; - default: - /* Something got corrupted */ - PyErr_SetString( - PyExc_ValueError, - "NumPy datetime metadata with corrupt unit value"); - return -1; - } + if ((base == NPY_FR_Y) || (base == NPY_FR_M)) { + const int64_t years = scaleYearToEpoch(dts->year); + if (base == NPY_FR_Y) { + return years; + } + + int64_t months = scaleYearsToMonths(years); + months += dts->month - 1; + if (base == NPY_FR_M) { + return months; + } } - return ret; + + const int64_t days = get_datetimestruct_days(dts); + if (base == NPY_FR_D) { + return days; + } + + if (base == NPY_FR_W) { + return scaleDaysToWeeks(days); + } + + int64_t hours = scaleDaysToHours(days); + hours += dts->hour; + if (base == NPY_FR_h) { + return hours; + } + + + int64_t minutes = scaleHoursToMinutes(hours); + minutes += dts->min; + if (base == NPY_FR_m) { + return minutes; + } + + int64_t seconds = scaleMinutesToSeconds(minutes); + seconds += dts->sec; + if (base == NPY_FR_s) { + return seconds; + } + + if (base == NPY_FR_ms) { + int64_t milliseconds = scaleSecondsToMilliseconds(seconds); + milliseconds += dts->us / 1000; + return milliseconds; + } + + int64_t microseconds = scaleSecondsToMicroseconds(seconds); + microseconds += dts->us; + if (base == NPY_FR_us) { + return microseconds; + } + + if (base == NPY_FR_ns) { + int64_t nanoseconds = scaleMicrosecondsToNanoseconds(microseconds); + nanoseconds += dts->ps / 1000; + return nanoseconds; + } + + int64_t picoseconds = scaleMicrosecondsToPicoseconds(microseconds); + picoseconds += dts->ps; + if (base == NPY_FR_ps) { + return picoseconds; + } + + if (base == NPY_FR_fs) { + int64_t femtoseconds = scalePicosecondsToFemtoseconds(picoseconds); + femtoseconds += dts->as / 1000; + return femtoseconds; + } + + if (base == NPY_FR_as) { + int64_t attoseconds = scalePicosecondsToAttoseconds(picoseconds); + attoseconds += dts->as; + return attoseconds; + } + + /* Something got corrupted */ + PyErr_SetString(PyExc_ValueError, + "NumPy datetime metadata with corrupt unit value"); + return -1; } /* From 0dea606660010b7426e0cf76f23ca29cfe157d51 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 17 Oct 2023 17:00:33 -0400 Subject: [PATCH 02/34] Used builtin overflow directives --- meson.build | 2 +- .../src/vendored/numpy/datetime/np_datetime.c | 161 +++++++++++++----- pandas/_libs/tslib.pyx | 1 - pandas/_libs/tslibs/conversion.pyx | 1 - pandas/_libs/tslibs/np_datetime.pxd | 6 +- pandas/_libs/tslibs/np_datetime.pyx | 4 +- pandas/tests/tslibs/test_array_to_datetime.py | 2 +- 7 files changed, 124 insertions(+), 53 deletions(-) diff --git a/meson.build b/meson.build index e0e533ffade97..cdfe6d37d6742 100644 --- a/meson.build +++ b/meson.build @@ -7,7 +7,7 @@ project( meson_version: '>=1.0.1', default_options: [ 'buildtype=release', - 'c_std=c99' + 'c_std=c11' ] ) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 83af248cf63ee..ddf5d3db727b9 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -29,6 +29,29 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include #include "pandas/vendored/numpy/datetime/np_datetime.h" +#if defined(_MSVC_VER) + #include + #define checked_int64_add(a, b, res) LongLongAdd(a, b, res) + #define checked_int64_sub(a, b, res) LongLongSub(a, b, res) + #define checked_int64_mul(a, b, res) LongLongMul(a, b, res) +#else + #if !__has_builtin(__builtin_add_overflow) + _Static_assert(0, "Overflow checking not detected; please try a newer compiler"); + #endif + #if _LP64 || __LP64__ || _ILP64 || __ILP64__ + #define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) + #define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) + #define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) + #else + #define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) + #define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) + #define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) + #endif +#endif + +// CHECK_OVERFLOW can be used in functions which define a +// OVERFLOW_OCCURRED goto label +#define CHECK_OVERFLOW(FUNC) do { if ((FUNC) != 0) goto OVERFLOW_OCCURRED; } while (0) const int days_per_month_table[2][12] = { {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, @@ -302,64 +325,91 @@ PyObject *extract_utc_offset(PyObject *obj) { } -static inline int64_t scaleYearToEpoch(int64_t year) { return year - 1970; } +static inline int scaleYearToEpoch(int64_t year, int64_t* result) { + return checked_int64_sub(year, 1970, result); +} -static inline int64_t scaleYearsToMonths(int64_t years) { return years * 12; } +static inline int scaleYearsToMonths(int64_t years, int64_t* result) { + return checked_int64_mul(years, 12, result); +} -static inline int64_t scaleDaysToWeeks(int64_t days) { +static inline int scaleDaysToWeeks(int64_t days, int64_t* result) { if (days >= 0) { - return days / 7; + *result = days / 7; + return 0; } else { - return (days - 6) / 7; + int res; + int64_t checked_days; + if ((res = checked_int64_sub(days, 6, &checked_days))) { + return res; + } + + *result = checked_days / 7; + return 0; } } -static inline int64_t scaleDaysToHours(int64_t days) { return days * 24; } +static inline int scaleDaysToHours(int64_t days, int64_t* result) { + return checked_int64_mul(days, 24, result); +} -static inline int64_t scaleHoursToMinutes(int64_t hours) { return hours * 60; } +static inline int scaleHoursToMinutes(int64_t hours, int64_t* result) { + return checked_int64_mul(hours, 60, result); +} -static inline int64_t scaleMinutesToSeconds(int64_t minutes) { - return minutes * 60; +static inline int scaleMinutesToSeconds(int64_t minutes, int64_t* result) { + return checked_int64_mul(minutes, 60, result); } -static inline int64_t scaleSecondsToMilliseconds(int64_t seconds) { - return seconds * 1000; +static inline int scaleSecondsToMilliseconds(int64_t seconds, int64_t* result) { + return checked_int64_mul(seconds, 1000, result); } -static inline int64_t scaleSecondsToMicroseconds(int64_t seconds) { - return seconds * 1000000; +static inline int scaleSecondsToMicroseconds(int64_t seconds, int64_t* result) { + return checked_int64_mul(seconds, 1000000, result); } -static inline int64_t scaleMicrosecondsToNanoseconds(int64_t microseconds) { - return microseconds * 1000; +static inline int scaleMicrosecondsToNanoseconds(int64_t microseconds, + int64_t* result) { + return checked_int64_mul(microseconds, 1000, result); } -static inline int64_t scaleMicrosecondsToPicoseconds(int64_t microseconds) { - return microseconds * 1000000; +static inline int scaleMicrosecondsToPicoseconds(int64_t microseconds, + int64_t* result) { + return checked_int64_mul(microseconds, 1000000, result); } -static inline int64_t scalePicosecondsToFemtoseconds(int64_t picoseconds) { - return picoseconds * 1000; +static inline int64_t scalePicosecondsToFemtoseconds(int64_t picoseconds, + int64_t* result) { + return checked_int64_mul(picoseconds, 1000, result); } -static inline int64_t scalePicosecondsToAttoseconds(int64_t picoseconds) { - return picoseconds * 1000000; +static inline int64_t scalePicosecondsToAttoseconds(int64_t picoseconds, + int64_t* result) { + return checked_int64_mul(picoseconds, 1000000, result); } /* * Converts a datetime from a datetimestruct to a datetime based - * on a metadata unit. The date is assumed to be valid. + * on a metadata unit. Returns -1 on and sets PyErr on error. */ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, const npy_datetimestruct *dts) { if ((base == NPY_FR_Y) || (base == NPY_FR_M)) { - const int64_t years = scaleYearToEpoch(dts->year); + int64_t years; + CHECK_OVERFLOW(scaleYearToEpoch(dts->year, &years)); + if (base == NPY_FR_Y) { return years; } - int64_t months = scaleYearsToMonths(years); - months += dts->month - 1; + int64_t months; + CHECK_OVERFLOW(scaleYearsToMonths(years, &months)); + + int64_t months_adder; + CHECK_OVERFLOW(checked_int64_sub(dts->month, 1, &months_adder)); + CHECK_OVERFLOW(checked_int64_add(months, months_adder, &months)); + if (base == NPY_FR_M) { return months; } @@ -371,61 +421,79 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, } if (base == NPY_FR_W) { - return scaleDaysToWeeks(days); + int64_t weeks; + CHECK_OVERFLOW(scaleDaysToWeeks(days, &weeks)); + return weeks; } - int64_t hours = scaleDaysToHours(days); - hours += dts->hour; + int64_t hours; + CHECK_OVERFLOW(scaleDaysToHours(days, &hours)); + CHECK_OVERFLOW(checked_int64_add(hours, dts->hour, &hours)); + if (base == NPY_FR_h) { return hours; } - int64_t minutes = scaleHoursToMinutes(hours); - minutes += dts->min; + int64_t minutes; + CHECK_OVERFLOW(scaleHoursToMinutes(hours, &minutes)); + CHECK_OVERFLOW(checked_int64_add(minutes, dts->min, &minutes)); + if (base == NPY_FR_m) { return minutes; } - int64_t seconds = scaleMinutesToSeconds(minutes); - seconds += dts->sec; + int64_t seconds; + CHECK_OVERFLOW(scaleMinutesToSeconds(minutes, &seconds)); + CHECK_OVERFLOW(checked_int64_add(seconds, dts->sec, &seconds)); + if (base == NPY_FR_s) { return seconds; } if (base == NPY_FR_ms) { - int64_t milliseconds = scaleSecondsToMilliseconds(seconds); - milliseconds += dts->us / 1000; + int64_t milliseconds; + CHECK_OVERFLOW(scaleSecondsToMilliseconds(seconds, &milliseconds)); + CHECK_OVERFLOW(checked_int64_add(milliseconds, dts->us / 1000, &milliseconds)); + return milliseconds; } - int64_t microseconds = scaleSecondsToMicroseconds(seconds); - microseconds += dts->us; + int64_t microseconds; + CHECK_OVERFLOW(scaleSecondsToMicroseconds(seconds, µseconds)); + CHECK_OVERFLOW(checked_int64_add(microseconds, dts->us, µseconds)); + if (base == NPY_FR_us) { return microseconds; } if (base == NPY_FR_ns) { - int64_t nanoseconds = scaleMicrosecondsToNanoseconds(microseconds); - nanoseconds += dts->ps / 1000; + int64_t nanoseconds; + CHECK_OVERFLOW(scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds)); + CHECK_OVERFLOW(checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds)); + return nanoseconds; } - int64_t picoseconds = scaleMicrosecondsToPicoseconds(microseconds); - picoseconds += dts->ps; + int64_t picoseconds; + CHECK_OVERFLOW(scaleMicrosecondsToPicoseconds(microseconds, &picoseconds)); + CHECK_OVERFLOW(checked_int64_add(picoseconds, dts->ps, &picoseconds)); + if (base == NPY_FR_ps) { return picoseconds; } if (base == NPY_FR_fs) { - int64_t femtoseconds = scalePicosecondsToFemtoseconds(picoseconds); - femtoseconds += dts->as / 1000; + int64_t femtoseconds; + CHECK_OVERFLOW(scalePicosecondsToFemtoseconds(picoseconds, &femtoseconds)); + CHECK_OVERFLOW(checked_int64_add(femtoseconds, dts->as / 1000, &femtoseconds)); return femtoseconds; } if (base == NPY_FR_as) { - int64_t attoseconds = scalePicosecondsToAttoseconds(picoseconds); - attoseconds += dts->as; + int64_t attoseconds; + CHECK_OVERFLOW(scalePicosecondsToAttoseconds(picoseconds, &attoseconds)); + CHECK_OVERFLOW(checked_int64_add(attoseconds, dts->as, &attoseconds)); return attoseconds; } @@ -433,6 +501,11 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, PyErr_SetString(PyExc_ValueError, "NumPy datetime metadata with corrupt unit value"); return -1; + +OVERFLOW_OCCURRED: + PyErr_SetString(PyExc_OverflowError, + "Overflow occurred in npy_datetimestruct_to_datetime"); + return -1; } /* diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 20a18cf56779f..3d28bb6b34c7f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -488,7 +488,6 @@ cpdef array_to_datetime( elif PyDate_Check(val): iresult[i] = pydate_to_dt64(val, &dts) - check_dts_bounds(&dts) elif is_datetime64_object(val): iresult[i] = get_datetime64_nanos(val, NPY_FR_ns) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 45c4d7809fe7a..116cd22d5a6a0 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -764,5 +764,4 @@ cdef int64_t parse_pydatetime( result = val.as_unit("ns")._value else: result = pydatetime_to_dt64(val, dts) - check_dts_bounds(dts) return result diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 60532174e8bdc..62864aa542af8 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -65,7 +65,7 @@ cdef extern from "pandas/datetime/pd_datetime.h": npy_datetimestruct *result) nogil npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, - npy_datetimestruct *d) nogil + npy_datetimestruct *d) nogil except? -1 void pandas_timedelta_to_timedeltastruct(npy_timedelta val, NPY_DATETIMEUNIT fr, @@ -89,11 +89,11 @@ cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?) cdef int64_t pydatetime_to_dt64( datetime val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=? -) +) except? -1 cdef void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts) noexcept cdef int64_t pydate_to_dt64( date val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=? -) +) except? -1 cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) noexcept cdef npy_datetime get_datetime64_value(object obj) noexcept nogil diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index c3ee68e14a8d4..73a87cdee6cc4 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -262,7 +262,7 @@ cdef void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts) noexcept: cdef int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts, - NPY_DATETIMEUNIT reso=NPY_FR_ns): + NPY_DATETIMEUNIT reso=NPY_FR_ns) except? -1: """ Note we are assuming that the datetime object is timezone-naive. """ @@ -280,7 +280,7 @@ cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) noexcept: cdef int64_t pydate_to_dt64( date val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=NPY_FR_ns -): +) except? -1: pydate_to_dtstruct(val, dts) return npy_datetimestruct_to_datetime(reso, dts) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 435fe5f4b90d8..d83d59b932b4d 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -132,7 +132,7 @@ def test_coerce_outside_ns_bounds(invalid_date, errors): if errors == "raise": msg = "^Out of bounds nanosecond timestamp: .*, at position 0$" - with pytest.raises(ValueError, match=msg): + with pytest.raises(OverflowError, match=msg): tslib.array_to_datetime(**kwargs) else: # coerce. result, _ = tslib.array_to_datetime(**kwargs) From d6a24f351042d6c030ca64ceee0516f88f6f4544 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 17 Oct 2023 18:15:09 -0400 Subject: [PATCH 03/34] macro fixups --- pandas/_libs/src/vendored/numpy/datetime/np_datetime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index ddf5d3db727b9..298535a5555a8 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -29,13 +29,13 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include #include "pandas/vendored/numpy/datetime/np_datetime.h" -#if defined(_MSVC_VER) +#if defined(_WIN32) #include #define checked_int64_add(a, b, res) LongLongAdd(a, b, res) #define checked_int64_sub(a, b, res) LongLongSub(a, b, res) #define checked_int64_mul(a, b, res) LongLongMul(a, b, res) #else - #if !__has_builtin(__builtin_add_overflow) + #if !(__has_builtin(__builtin_add_overflow)) _Static_assert(0, "Overflow checking not detected; please try a newer compiler"); #endif #if _LP64 || __LP64__ || _ILP64 || __ILP64__ From 21e919c1ef8e0c08f6849da805604f33ce19597d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 17 Oct 2023 18:43:15 -0400 Subject: [PATCH 04/34] more macro cleanups --- .../src/vendored/numpy/datetime/np_datetime.c | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 298535a5555a8..9513b14df7dcf 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -30,23 +30,24 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include "pandas/vendored/numpy/datetime/np_datetime.h" #if defined(_WIN32) - #include - #define checked_int64_add(a, b, res) LongLongAdd(a, b, res) - #define checked_int64_sub(a, b, res) LongLongSub(a, b, res) - #define checked_int64_mul(a, b, res) LongLongMul(a, b, res) +# include +# define checked_int64_add(a, b, res) LongLongAdd(a, b, res) +# define checked_int64_sub(a, b, res) LongLongSub(a, b, res) +# define checked_int64_mul(a, b, res) LongLongMul(a, b, res) #else - #if !(__has_builtin(__builtin_add_overflow)) +# if defined(__has_builtin) && __has_builtin(__builtin_add_overflow) +# if _LP64 || __LP64__ || _ILP64 || __ILP64__ +# define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) +# define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) +# define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) +# else +# define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) +# define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) +# define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) +# endif +# else _Static_assert(0, "Overflow checking not detected; please try a newer compiler"); - #endif - #if _LP64 || __LP64__ || _ILP64 || __ILP64__ - #define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) - #define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) - #define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) - #else - #define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) - #define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) - #define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) - #endif +# endif #endif // CHECK_OVERFLOW can be used in functions which define a From 6302f2f368e53dd8ad6dc02f7202ec738805205e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 17 Oct 2023 19:09:38 -0400 Subject: [PATCH 05/34] more macro refactor --- .../src/vendored/numpy/datetime/np_datetime.c | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 9513b14df7dcf..b41a2c4d67a6a 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -35,18 +35,22 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt # define checked_int64_sub(a, b, res) LongLongSub(a, b, res) # define checked_int64_mul(a, b, res) LongLongMul(a, b, res) #else -# if defined(__has_builtin) && __has_builtin(__builtin_add_overflow) -# if _LP64 || __LP64__ || _ILP64 || __ILP64__ -# define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) -# define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) -# define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) +# if defined __has_builtin +# if __has_builtin(__builtin_add_overflow) +# if _LP64 || __LP64__ || _ILP64 || __ILP64__ +# define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) +# define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) +# define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) +# else +# define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) +# define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) +# define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) +# endif # else -# define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) -# define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) -# define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) +_Static_assert(0, "Overflow checking not detected; please try a newer compiler"); # endif # else - _Static_assert(0, "Overflow checking not detected; please try a newer compiler"); +_Static_assert(0, "__has_builtin not detected; please try a newer compiler"); # endif #endif From e2646a6b16cc320a0e6e0c3b67089734c4fd0a4b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 17 Oct 2023 22:05:45 -0400 Subject: [PATCH 06/34] musllinux c support --- .../_libs/src/vendored/numpy/datetime/np_datetime.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index b41a2c4d67a6a..2285b106bbd81 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -49,6 +49,18 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt # else _Static_assert(0, "Overflow checking not detected; please try a newer compiler"); # endif +// __has_builtin was added in gcc 10, but our muslinux_1_1 build environment +// only has gcc-9.3, so fall back to __GNUC__ macro as long as we have that +# elif __GNUC__ > 7 +# if _LP64 || __LP64__ || _ILP64 || __ILP64__ +# define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) +# define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) +# define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) +# else +# define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) +# define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) +# define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) +# endif # else _Static_assert(0, "__has_builtin not detected; please try a newer compiler"); # endif From f55a58ab018a48ef64680d7d62606bbe522887c4 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 19 Oct 2023 16:04:06 -0400 Subject: [PATCH 07/34] macro cleanup --- .../src/vendored/numpy/datetime/np_datetime.c | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 2285b106bbd81..a97ea066a90b2 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -30,6 +30,9 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include "pandas/vendored/numpy/datetime/np_datetime.h" #if defined(_WIN32) +# ifndef ENABLE_INTSAFE_SIGNED_FUNCTIONS +# define ENABLE_INTSAFE_SIGNED_FUNCTIONS +# endif # include # define checked_int64_add(a, b, res) LongLongAdd(a, b, res) # define checked_int64_sub(a, b, res) LongLongSub(a, b, res) @@ -66,9 +69,11 @@ _Static_assert(0, "__has_builtin not detected; please try a newer compiler"); # endif #endif -// CHECK_OVERFLOW can be used in functions which define a -// OVERFLOW_OCCURRED goto label -#define CHECK_OVERFLOW(FUNC) do { if ((FUNC) != 0) goto OVERFLOW_OCCURRED; } while (0) +#define PD_CHECK_OVERFLOW(FUNC) do { if ((FUNC) != 0) { \ + PyErr_SetString(PyExc_OverflowError, \ + "Overflow occurred in npy_datetimestruct_to_datetime"); \ + return -1; \ +}} while (0) const int days_per_month_table[2][12] = { {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, @@ -414,18 +419,18 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, const npy_datetimestruct *dts) { if ((base == NPY_FR_Y) || (base == NPY_FR_M)) { int64_t years; - CHECK_OVERFLOW(scaleYearToEpoch(dts->year, &years)); + PD_CHECK_OVERFLOW(scaleYearToEpoch(dts->year, &years)); if (base == NPY_FR_Y) { return years; } int64_t months; - CHECK_OVERFLOW(scaleYearsToMonths(years, &months)); + PD_CHECK_OVERFLOW(scaleYearsToMonths(years, &months)); int64_t months_adder; - CHECK_OVERFLOW(checked_int64_sub(dts->month, 1, &months_adder)); - CHECK_OVERFLOW(checked_int64_add(months, months_adder, &months)); + PD_CHECK_OVERFLOW(checked_int64_sub(dts->month, 1, &months_adder)); + PD_CHECK_OVERFLOW(checked_int64_add(months, months_adder, &months)); if (base == NPY_FR_M) { return months; @@ -439,13 +444,13 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, if (base == NPY_FR_W) { int64_t weeks; - CHECK_OVERFLOW(scaleDaysToWeeks(days, &weeks)); + PD_CHECK_OVERFLOW(scaleDaysToWeeks(days, &weeks)); return weeks; } int64_t hours; - CHECK_OVERFLOW(scaleDaysToHours(days, &hours)); - CHECK_OVERFLOW(checked_int64_add(hours, dts->hour, &hours)); + PD_CHECK_OVERFLOW(scaleDaysToHours(days, &hours)); + PD_CHECK_OVERFLOW(checked_int64_add(hours, dts->hour, &hours)); if (base == NPY_FR_h) { return hours; @@ -453,16 +458,16 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, int64_t minutes; - CHECK_OVERFLOW(scaleHoursToMinutes(hours, &minutes)); - CHECK_OVERFLOW(checked_int64_add(minutes, dts->min, &minutes)); + PD_CHECK_OVERFLOW(scaleHoursToMinutes(hours, &minutes)); + PD_CHECK_OVERFLOW(checked_int64_add(minutes, dts->min, &minutes)); if (base == NPY_FR_m) { return minutes; } int64_t seconds; - CHECK_OVERFLOW(scaleMinutesToSeconds(minutes, &seconds)); - CHECK_OVERFLOW(checked_int64_add(seconds, dts->sec, &seconds)); + PD_CHECK_OVERFLOW(scaleMinutesToSeconds(minutes, &seconds)); + PD_CHECK_OVERFLOW(checked_int64_add(seconds, dts->sec, &seconds)); if (base == NPY_FR_s) { return seconds; @@ -470,15 +475,15 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, if (base == NPY_FR_ms) { int64_t milliseconds; - CHECK_OVERFLOW(scaleSecondsToMilliseconds(seconds, &milliseconds)); - CHECK_OVERFLOW(checked_int64_add(milliseconds, dts->us / 1000, &milliseconds)); + PD_CHECK_OVERFLOW(scaleSecondsToMilliseconds(seconds, &milliseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(milliseconds, dts->us / 1000, &milliseconds)); return milliseconds; } int64_t microseconds; - CHECK_OVERFLOW(scaleSecondsToMicroseconds(seconds, µseconds)); - CHECK_OVERFLOW(checked_int64_add(microseconds, dts->us, µseconds)); + PD_CHECK_OVERFLOW(scaleSecondsToMicroseconds(seconds, µseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(microseconds, dts->us, µseconds)); if (base == NPY_FR_us) { return microseconds; @@ -486,15 +491,15 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, if (base == NPY_FR_ns) { int64_t nanoseconds; - CHECK_OVERFLOW(scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds)); - CHECK_OVERFLOW(checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds)); + PD_CHECK_OVERFLOW(scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds)); return nanoseconds; } int64_t picoseconds; - CHECK_OVERFLOW(scaleMicrosecondsToPicoseconds(microseconds, &picoseconds)); - CHECK_OVERFLOW(checked_int64_add(picoseconds, dts->ps, &picoseconds)); + PD_CHECK_OVERFLOW(scaleMicrosecondsToPicoseconds(microseconds, &picoseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(picoseconds, dts->ps, &picoseconds)); if (base == NPY_FR_ps) { return picoseconds; @@ -502,15 +507,15 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, if (base == NPY_FR_fs) { int64_t femtoseconds; - CHECK_OVERFLOW(scalePicosecondsToFemtoseconds(picoseconds, &femtoseconds)); - CHECK_OVERFLOW(checked_int64_add(femtoseconds, dts->as / 1000, &femtoseconds)); + PD_CHECK_OVERFLOW(scalePicosecondsToFemtoseconds(picoseconds, &femtoseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(femtoseconds, dts->as / 1000, &femtoseconds)); return femtoseconds; } if (base == NPY_FR_as) { int64_t attoseconds; - CHECK_OVERFLOW(scalePicosecondsToAttoseconds(picoseconds, &attoseconds)); - CHECK_OVERFLOW(checked_int64_add(attoseconds, dts->as, &attoseconds)); + PD_CHECK_OVERFLOW(scalePicosecondsToAttoseconds(picoseconds, &attoseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(attoseconds, dts->as, &attoseconds)); return attoseconds; } @@ -518,11 +523,6 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, PyErr_SetString(PyExc_ValueError, "NumPy datetime metadata with corrupt unit value"); return -1; - -OVERFLOW_OCCURRED: - PyErr_SetString(PyExc_OverflowError, - "Overflow occurred in npy_datetimestruct_to_datetime"); - return -1; } /* From 057e74fc0ed033568b18e760a4488262b8bd57ae Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 19 Oct 2023 16:17:21 -0400 Subject: [PATCH 08/34] more refactor --- pandas/_libs/tslib.pyx | 5 ++++- pandas/_libs/tslibs/conversion.pyx | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 2535d8892e929..53622921a4c3b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -488,7 +488,10 @@ cpdef array_to_datetime( iresult[i] = parse_pydatetime(val, &dts, utc_convert, creso=creso) elif PyDate_Check(val): - iresult[i] = pydate_to_dt64(val, &dts) + try: + iresult[i] = pydate_to_dt64(val, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e elif is_datetime64_object(val): iresult[i] = get_datetime64_nanos(val, NPY_FR_ns) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 677e5748a2ae2..8f2a8d4f8c552 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -765,5 +765,8 @@ cdef int64_t parse_pydatetime( if isinstance(val, _Timestamp): result = (<_Timestamp>val)._as_creso(creso, round_ok=False)._value else: - result = pydatetime_to_dt64(val, dts, reso=creso) + try: + result = pydatetime_to_dt64(val, dts, reso=creso) + except OverflowError as e: + raise OutOfBoundsDatetime from e return result From 2d6d5fcca183842490db2799c95e8d146eb36df6 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 19 Oct 2023 16:30:44 -0400 Subject: [PATCH 09/34] fix cython warning --- pandas/_libs/tslibs/np_datetime.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 62864aa542af8..e0e6665ee4db9 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -65,7 +65,7 @@ cdef extern from "pandas/datetime/pd_datetime.h": npy_datetimestruct *result) nogil npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, - npy_datetimestruct *d) nogil except? -1 + npy_datetimestruct *d) except? -1 nogil void pandas_timedelta_to_timedeltastruct(npy_timedelta val, NPY_DATETIMEUNIT fr, From e65e22965a0354c245218401a675cfed9513dd8e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 19 Oct 2023 16:33:44 -0400 Subject: [PATCH 10/34] windows fix --- pandas/_libs/src/vendored/numpy/datetime/np_datetime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index a97ea066a90b2..48f3c4e0a41af 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -36,7 +36,7 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt # include # define checked_int64_add(a, b, res) LongLongAdd(a, b, res) # define checked_int64_sub(a, b, res) LongLongSub(a, b, res) -# define checked_int64_mul(a, b, res) LongLongMul(a, b, res) +# define checked_int64_mul(a, b, res) LongLongMult(a, b, res) #else # if defined __has_builtin # if __has_builtin(__builtin_add_overflow) From af29e7c5b15ede462d7181cf98d13f79e71f3e55 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 20 Oct 2023 10:24:11 -0400 Subject: [PATCH 11/34] Raise Outofboundsdatetime --- pandas/_libs/tslib.pyx | 7 +- pandas/_libs/tslibs/conversion.pyx | 20 ++- pandas/_libs/tslibs/np_datetime.pyx | 31 ++++- pandas/_libs/tslibs/offsets.pyx | 185 +++++++++++++++------------- pandas/_libs/tslibs/period.pxd | 2 +- pandas/_libs/tslibs/period.pyx | 60 ++++++--- pandas/_libs/tslibs/strptime.pyx | 12 +- pandas/_libs/tslibs/timestamps.pyx | 12 +- 8 files changed, 207 insertions(+), 122 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 53622921a4c3b..0317866ff6d66 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -34,7 +34,6 @@ cnp.import_array() from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, - check_dts_bounds, import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, @@ -94,8 +93,10 @@ def _test_parse_iso8601(ts: str): obj = _TSObject() string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True) - obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts) - check_dts_bounds(&obj.dts) + try: + obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e if out_local == 1: obj.tzinfo = timezone(timedelta(minutes=out_tzoffset)) obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 8f2a8d4f8c552..1888aa45407b2 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -207,7 +207,10 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1: if unit != reso: pandas_datetime_to_datetimestruct(ival, unit, &dts) check_dts_bounds(&dts, reso) - ival = npy_datetimestruct_to_datetime(reso, &dts) + try: + ival = npy_datetimestruct_to_datetime(reso, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime return ival @@ -398,7 +401,10 @@ cdef _TSObject convert_datetime_to_tsobject( if nanos: obj.dts.ps = nanos * 1000 - obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) + try: + obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e if obj.tzinfo is not None and not is_utc(obj.tzinfo): offset = get_utcoffset(obj.tzinfo, ts) @@ -435,7 +441,10 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, datetime dt Py_ssize_t pos - value = npy_datetimestruct_to_datetime(reso, &dts) + try: + value = npy_datetimestruct_to_datetime(reso, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e obj.dts = dts obj.tzinfo = timezone(timedelta(minutes=tzoffset)) obj.value = tz_localize_to_utc_single( @@ -531,7 +540,10 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit, dts, out_tzoffset, tz, reso ) else: - ival = npy_datetimestruct_to_datetime(reso, &dts) + try: + ival = npy_datetimestruct_to_datetime(reso, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e if tz is not None: # shift for _localize_tso ival = tz_localize_to_utc_single( diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 73a87cdee6cc4..89ca6f3421aa4 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -266,8 +266,14 @@ cdef int64_t pydatetime_to_dt64(datetime val, """ Note we are assuming that the datetime object is timezone-naive. """ + cdef int64_t result pydatetime_to_dtstruct(val, dts) - return npy_datetimestruct_to_datetime(reso, dts) + try: + result = npy_datetimestruct_to_datetime(reso, dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e + + return result cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) noexcept: @@ -281,8 +287,15 @@ cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) noexcept: cdef int64_t pydate_to_dt64( date val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=NPY_FR_ns ) except? -1: + cdef int64_t result pydate_to_dtstruct(val, dts) - return npy_datetimestruct_to_datetime(reso, dts) + + try: + result = npy_datetimestruct_to_datetime(reso, dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e + + return result cdef int string_to_dts( @@ -412,7 +425,10 @@ cpdef ndarray astype_overflowsafe( else: raise else: - new_value = npy_datetimestruct_to_datetime(to_unit, &dts) + try: + new_value = npy_datetimestruct_to_datetime(to_unit, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e # Analogous to: iresult[i] = new_value (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value @@ -657,7 +673,14 @@ cdef int64_t _convert_reso_with_dtstruct( ) except? -1: cdef: npy_datetimestruct dts + int64_t result pandas_datetime_to_datetimestruct(value, from_unit, &dts) check_dts_bounds(&dts, to_unit) - return npy_datetimestruct_to_datetime(to_unit, &dts) + + try: + result = npy_datetimestruct_to_datetime(to_unit, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e + + return result diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index b25afbf0541a9..4f37c109477b6 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -75,6 +75,7 @@ from pandas._libs.tslibs.np_datetime cimport ( pandas_datetime_to_datetimestruct, pydate_to_dtstruct, ) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime import_pandas_datetime() @@ -3061,54 +3062,56 @@ cdef class SemiMonthOffset(SingleConstructorOffset): NPY_DATETIMEUNIT reso = get_unit_from_dtype(dtarr.dtype) cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, i8other) - with nogil: - for i in range(count): - # Analogous to: val = i8other[i] - val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + for i in range(count): + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - if val == NPY_NAT: - res_val = NPY_NAT + if val == NPY_NAT: + res_val = NPY_NAT + + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + day = dts.day + + # Adjust so that we are always looking at self.day_of_month, + # incrementing/decrementing n if necessary. + nadj = roll_convention(day, n, anchor_dom) + + days_in_month = get_days_in_month(dts.year, dts.month) + # For SemiMonthBegin on other.day == 1 and + # SemiMonthEnd on other.day == days_in_month, + # shifting `other` to `self.day_of_month` _always_ requires + # incrementing/decrementing `n`, regardless of whether it is + # initially positive. + if is_start and (n <= 0 and day == 1): + nadj -= 1 + elif (not is_start) and (n > 0 and day == days_in_month): + nadj += 1 + + if is_start: + # See also: SemiMonthBegin._apply + months = nadj // 2 + nadj % 2 + to_day = 1 if nadj % 2 else anchor_dom else: - pandas_datetime_to_datetimestruct(val, reso, &dts) - day = dts.day - - # Adjust so that we are always looking at self.day_of_month, - # incrementing/decrementing n if necessary. - nadj = roll_convention(day, n, anchor_dom) - - days_in_month = get_days_in_month(dts.year, dts.month) - # For SemiMonthBegin on other.day == 1 and - # SemiMonthEnd on other.day == days_in_month, - # shifting `other` to `self.day_of_month` _always_ requires - # incrementing/decrementing `n`, regardless of whether it is - # initially positive. - if is_start and (n <= 0 and day == 1): - nadj -= 1 - elif (not is_start) and (n > 0 and day == days_in_month): - nadj += 1 - - if is_start: - # See also: SemiMonthBegin._apply - months = nadj // 2 + nadj % 2 - to_day = 1 if nadj % 2 else anchor_dom - - else: - # See also: SemiMonthEnd._apply - months = nadj // 2 - to_day = 31 if nadj % 2 else anchor_dom - - dts.year = year_add_months(dts, months) - dts.month = month_add_months(dts, months) - days_in_month = get_days_in_month(dts.year, dts.month) - dts.day = min(to_day, days_in_month) + # See also: SemiMonthEnd._apply + months = nadj // 2 + to_day = 31 if nadj % 2 else anchor_dom + dts.year = year_add_months(dts, months) + dts.month = month_add_months(dts, months) + days_in_month = get_days_in_month(dts.year, dts.month) + dts.day = min(to_day, days_in_month) + + try: res_val = npy_datetimestruct_to_datetime(reso, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e - # Analogous to: out[i] = res_val - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - cnp.PyArray_MultiIter_NEXT(mi) + cnp.PyArray_MultiIter_NEXT(mi) return out @@ -4863,30 +4866,32 @@ cdef ndarray shift_quarters( npy_datetimestruct dts cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, dtindex) - with nogil: - for i in range(count): - # Analogous to: val = dtindex[i] - val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + for i in range(count): + # Analogous to: val = dtindex[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - if val == NPY_NAT: - res_val = NPY_NAT - else: - pandas_datetime_to_datetimestruct(val, reso, &dts) - n = quarters + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + n = quarters - months_since = (dts.month - q1start_month) % modby - n = _roll_qtrday(&dts, n, months_since, day_opt) + months_since = (dts.month - q1start_month) % modby + n = _roll_qtrday(&dts, n, months_since, day_opt) - dts.year = year_add_months(dts, modby * n - months_since) - dts.month = month_add_months(dts, modby * n - months_since) - dts.day = get_day_of_month(&dts, day_opt) + dts.year = year_add_months(dts, modby * n - months_since) + dts.month = month_add_months(dts, modby * n - months_since) + dts.day = get_day_of_month(&dts, day_opt) + try: res_val = npy_datetimestruct_to_datetime(reso, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e - # Analogous to: out[i] = res_val - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - cnp.PyArray_MultiIter_NEXT(mi) + cnp.PyArray_MultiIter_NEXT(mi) return out @@ -4926,51 +4931,55 @@ def shift_months( if day_opt is None: # TODO: can we combine this with the non-None case? - with nogil: - for i in range(count): - # Analogous to: val = i8other[i] - val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + for i in range(count): + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - if val == NPY_NAT: - res_val = NPY_NAT - else: - pandas_datetime_to_datetimestruct(val, reso, &dts) - dts.year = year_add_months(dts, months) - dts.month = month_add_months(dts, months) + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + dts.year = year_add_months(dts, months) + dts.month = month_add_months(dts, months) - dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) + dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) + try: res_val = npy_datetimestruct_to_datetime(reso, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e - # Analogous to: out[i] = res_val - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - cnp.PyArray_MultiIter_NEXT(mi) + cnp.PyArray_MultiIter_NEXT(mi) else: - with nogil: - for i in range(count): + for i in range(count): - # Analogous to: val = i8other[i] - val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - if val == NPY_NAT: - res_val = NPY_NAT - else: - pandas_datetime_to_datetimestruct(val, reso, &dts) - months_to_roll = months + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + months_to_roll = months - months_to_roll = _roll_qtrday(&dts, months_to_roll, 0, day_opt) + months_to_roll = _roll_qtrday(&dts, months_to_roll, 0, day_opt) - dts.year = year_add_months(dts, months_to_roll) - dts.month = month_add_months(dts, months_to_roll) - dts.day = get_day_of_month(&dts, day_opt) + dts.year = year_add_months(dts, months_to_roll) + dts.month = month_add_months(dts, months_to_roll) + dts.day = get_day_of_month(&dts, day_opt) + try: res_val = npy_datetimestruct_to_datetime(reso, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e - # Analogous to: out[i] = res_val - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - cnp.PyArray_MultiIter_NEXT(mi) + cnp.PyArray_MultiIter_NEXT(mi) return out diff --git a/pandas/_libs/tslibs/period.pxd b/pandas/_libs/tslibs/period.pxd index 8afe7c9b3f693..cbddc12616c9f 100644 --- a/pandas/_libs/tslibs/period.pxd +++ b/pandas/_libs/tslibs/period.pxd @@ -4,4 +4,4 @@ from .np_datetime cimport npy_datetimestruct cdef bint is_period_object(object obj) -cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) noexcept nogil +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 518577895ec9b..cfc5f0b64c69a 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -43,6 +43,8 @@ from pandas._libs.tslibs.dtypes cimport ( freq_to_period_freqstr, ) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + # import datetime C API import_datetime() @@ -352,7 +354,7 @@ cdef int64_t transform_via_day(int64_t ordinal, # -------------------------------------------------------------------- # Conversion _to_ Daily Freq -cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info): cdef: int64_t unix_date npy_datetimestruct dts @@ -368,7 +370,7 @@ cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return upsample_daytime(unix_date, af_info) -cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info): cdef: int64_t unix_date npy_datetimestruct dts @@ -384,7 +386,7 @@ cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return upsample_daytime(unix_date, af_info) -cdef int64_t asfreq_MtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_MtoDT(int64_t ordinal, asfreq_info *af_info): cdef: int64_t unix_date int year, month @@ -408,7 +410,7 @@ cdef int64_t asfreq_WtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: # -------------------------------------------------------------------- # Conversion _to_ BusinessDay Freq -cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info): cdef: int roll_back npy_datetimestruct dts @@ -419,7 +421,7 @@ cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return DtoB(&dts, roll_back, unix_date) -cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info): cdef: int roll_back npy_datetimestruct dts @@ -430,7 +432,7 @@ cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return DtoB(&dts, roll_back, unix_date) -cdef int64_t asfreq_MtoB(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_MtoB(int64_t ordinal, asfreq_info *af_info): cdef: int roll_back npy_datetimestruct dts @@ -467,7 +469,7 @@ cdef int64_t asfreq_DTtoB(int64_t ordinal, asfreq_info *af_info) noexcept nogil: # ---------------------------------------------------------------------- # Conversion _from_ Daily Freq -cdef int64_t asfreq_DTtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_DTtoA(int64_t ordinal, asfreq_info *af_info): cdef: npy_datetimestruct dts @@ -723,7 +725,7 @@ cdef void adjust_dts_for_qtr(npy_datetimestruct* dts, int to_end) noexcept nogil # Find the unix_date (days elapsed since datetime(1970, 1, 1) # for the given year/month/day. # Assumes GREGORIAN_CALENDAR */ -cdef int64_t unix_date_from_ymd(int year, int month, int day) noexcept nogil: +cdef int64_t unix_date_from_ymd(int year, int month, int day): # Calculate the absolute date cdef: npy_datetimestruct dts @@ -733,7 +735,11 @@ cdef int64_t unix_date_from_ymd(int year, int month, int day) noexcept nogil: dts.year = year dts.month = month dts.day = day - unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, &dts) + try: + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e + return unix_date @@ -742,11 +748,15 @@ cdef int64_t dts_to_month_ordinal(npy_datetimestruct* dts) noexcept nogil: return ((dts.year - 1970) * 12 + dts.month - 1) -cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) noexcept nogil: +cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end): cdef: int64_t result - result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_Y, dts) + try: + result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_Y, dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e + if dts.month > to_end: return result + 1 else: @@ -774,7 +784,7 @@ cdef int get_anchor_month(int freq, int freq_group) noexcept nogil: # specifically _dont_ use cdvision or else ordinals near -1 are assigned to # incorrect dates GH#19643 @cython.cdivision(False) -cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) noexcept nogil: +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq): """ Generate an ordinal in period space @@ -803,15 +813,26 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) noexcept nogi return dts_to_qtr_ordinal(dts, fmonth) elif freq_group == FR_WK: - unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) + try: + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e return unix_date_to_week(unix_date, freq - FR_WK) elif freq == FR_BUS: - unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) + try: + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e return DtoB(dts, 0, unix_date) unit = freq_group_code_to_npy_unit(freq) - return npy_datetimestruct_to_datetime(unit, dts) + try: + unix_date = npy_datetimestruct_to_datetime(unit, dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e + + return unix_date cdef void get_date_info(int64_t ordinal, @@ -1157,6 +1178,7 @@ cpdef int64_t period_ordinal(int y, int m, int d, int h, int min, cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: cdef: npy_datetimestruct dts + int64_t result if ordinal == NPY_NAT: return NPY_NAT @@ -1164,7 +1186,13 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: get_date_info(ordinal, freq, &dts) check_dts_bounds(&dts) - return npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts) + + try: + result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e + + return result cdef str period_format(int64_t value, int freq, object fmt=None): diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 866181246a284..8b510b71c5113 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -327,7 +327,10 @@ def array_strptime( if string_to_dts_succeeded: # No error reported by string_to_dts, pick back up # where we left off - value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) + try: + value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e if out_local == 1: # Store the out_tzoffset in seconds # since we store the total_seconds of @@ -337,7 +340,6 @@ def array_strptime( out_local = 0 out_tzoffset = 0 iresult[i] = value - check_dts_bounds(&dts) continue if parse_today_now(val, &iresult[i], utc): @@ -515,8 +517,10 @@ def array_strptime( dts.us = us dts.ps = ns * 1000 - iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) - check_dts_bounds(&dts) + try: + iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e result_timezone[i] = tz diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index bf25eaeff19a5..683c312f5950b 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -581,7 +581,12 @@ cdef class _Timestamp(ABCTimestamp): if own_tz is not None and not is_utc(own_tz): pydatetime_to_dtstruct(self, &dts) - val = npy_datetimestruct_to_datetime(self._creso, &dts) + self.nanosecond + try: + # TODO: can + self.nanosecond also overflow here? + val = npy_datetimestruct_to_datetime( + self._creso, &dts) + self.nanosecond + except OverflowError as e: + raise OutOfBoundsDatetime from e else: val = self._value return val @@ -2495,7 +2500,10 @@ default 'raise' # to datetimes outside of pydatetime range. ts = _TSObject() check_dts_bounds(&dts, self._creso) - ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) + try: + ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) + except OverflowError as e: + raise OutOfBoundsDatetime from e ts.dts = dts ts.creso = self._creso ts.fold = fold From f9e5e35eba2eed2388d8b1494a09168910cd9f81 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 20 Oct 2023 10:29:49 -0400 Subject: [PATCH 12/34] cleanup GIL warnings --- pandas/_libs/tslibs/period.pyx | 48 +++++++++++++++++----------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index cfc5f0b64c69a..d019a7f6ce415 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -133,7 +133,7 @@ ctypedef struct asfreq_info: int to_end int from_end -ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) noexcept nogil +ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) cdef extern from *: @@ -342,7 +342,7 @@ cdef int64_t downsample_daytime(int64_t ordinal, asfreq_info *af_info) noexcept cdef int64_t transform_via_day(int64_t ordinal, asfreq_info *af_info, freq_conv_func first_func, - freq_conv_func second_func) noexcept nogil: + freq_conv_func second_func): cdef: int64_t result @@ -522,30 +522,30 @@ cdef int64_t unix_date_to_week(int64_t unix_date, int to_end) noexcept nogil: # -------------------------------------------------------------------- # Conversion _from_ BusinessDay Freq -cdef int64_t asfreq_BtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_BtoDT(int64_t ordinal, asfreq_info *af_info): ordinal = ((ordinal + 3) // 5) * 7 + (ordinal + 3) % 5 - 3 return upsample_daytime(ordinal, af_info) -cdef int64_t asfreq_BtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_BtoA(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoA) -cdef int64_t asfreq_BtoQ(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_BtoQ(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_BtoM(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_BtoM(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoM) -cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoW) @@ -554,25 +554,25 @@ cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: # ---------------------------------------------------------------------- # Conversion _from_ Annual Freq -cdef int64_t asfreq_AtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_AtoA(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoA) -cdef int64_t asfreq_AtoQ(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_AtoQ(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_AtoM(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_AtoM(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoM) -cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoW) @@ -581,25 +581,25 @@ cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: # ---------------------------------------------------------------------- # Conversion _from_ Quarterly Freq -cdef int64_t asfreq_QtoQ(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_QtoQ(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_QtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_QtoA(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoA) -cdef int64_t asfreq_QtoM(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_QtoM(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoM) -cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoW) @@ -608,19 +608,19 @@ cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: # ---------------------------------------------------------------------- # Conversion _from_ Monthly Freq -cdef int64_t asfreq_MtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_MtoA(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoA) -cdef int64_t asfreq_MtoQ(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_MtoQ(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoW) @@ -629,25 +629,25 @@ cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: # ---------------------------------------------------------------------- # Conversion _from_ Weekly Freq -cdef int64_t asfreq_WtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_WtoA(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoA) -cdef int64_t asfreq_WtoQ(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_WtoQ(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_WtoM(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_WtoM(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoM) -cdef int64_t asfreq_WtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: +cdef int64_t asfreq_WtoW(int64_t ordinal, asfreq_info *af_info): return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoW) @@ -836,7 +836,7 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq): cdef void get_date_info(int64_t ordinal, - int freq, npy_datetimestruct *dts) noexcept nogil: + int freq, npy_datetimestruct *dts): cdef: int64_t unix_date, nanos npy_datetimestruct dts2 @@ -854,7 +854,7 @@ cdef void get_date_info(int64_t ordinal, dts.ps = dts2.ps -cdef int64_t get_unix_date(int64_t period_ordinal, int freq) noexcept nogil: +cdef int64_t get_unix_date(int64_t period_ordinal, int freq): """ Returns the proleptic Gregorian ordinal of the date, as an integer. This corresponds to the number of days since Jan., 1st, 1970 AD. From 1b89dfe48ee1e730844870ef53122662ec68ad60 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 20 Oct 2023 10:57:21 -0400 Subject: [PATCH 13/34] more error handling cleanup --- pandas/_libs/tslib.pyx | 6 ++++-- pandas/_libs/tslibs/conversion.pyx | 7 ++++--- pandas/_libs/tslibs/np_datetime.pyx | 2 -- pandas/_libs/tslibs/period.pyx | 5 +---- pandas/_libs/tslibs/strptime.pyx | 8 ++++++-- pandas/_libs/tslibs/timestamps.pyx | 2 -- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 0317866ff6d66..7a108b2515be3 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -96,7 +96,7 @@ def _test_parse_iso8601(ts: str): try: obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts) except OverflowError as e: - raise OutOfBoundsDatetime from e + raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {ts}") from e if out_local == 1: obj.tzinfo = timezone(timedelta(minutes=out_tzoffset)) obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) @@ -492,7 +492,9 @@ cpdef array_to_datetime( try: iresult[i] = pydate_to_dt64(val, &dts) except OverflowError as e: - raise OutOfBoundsDatetime from e + raise OutOfBoundsDatetime( + f"Out of bounds nanosecond timestamp: {val}" + ) from e elif is_datetime64_object(val): iresult[i] = get_datetime64_nanos(val, NPY_FR_ns) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 1888aa45407b2..6dbb13e7ff095 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -206,11 +206,12 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1: if unit != reso: pandas_datetime_to_datetimestruct(ival, unit, &dts) - check_dts_bounds(&dts, reso) try: ival = npy_datetimestruct_to_datetime(reso, &dts) except OverflowError as e: - raise OutOfBoundsDatetime + raise OutOfBoundsDatetime( + "Out of bounds nanosecond timestamp: {val}" + ) from e return ival @@ -404,7 +405,7 @@ cdef _TSObject convert_datetime_to_tsobject( try: obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) except OverflowError as e: - raise OutOfBoundsDatetime from e + raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from e if obj.tzinfo is not None and not is_utc(obj.tzinfo): offset = get_utcoffset(obj.tzinfo, ts) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 89ca6f3421aa4..90c3b6a279132 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -676,8 +676,6 @@ cdef int64_t _convert_reso_with_dtstruct( int64_t result pandas_datetime_to_datetimestruct(value, from_unit, &dts) - check_dts_bounds(&dts, to_unit) - try: result = npy_datetimestruct_to_datetime(to_unit, &dts) except OverflowError as e: diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index d019a7f6ce415..4bc110b924858 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -54,7 +54,6 @@ from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_D, astype_overflowsafe, - check_dts_bounds, get_timedelta64_value, import_pandas_datetime, npy_datetimestruct, @@ -1185,12 +1184,10 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: get_date_info(ordinal, freq, &dts) - check_dts_bounds(&dts) - try: result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts) except OverflowError as e: - raise OutOfBoundsDatetime from e + raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from e return result diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 8b510b71c5113..29407c7f2f46d 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -330,7 +330,9 @@ def array_strptime( try: value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) except OverflowError as e: - raise OutOfBoundsDatetime from e + raise OutOfBoundsDatetime( + f"Out of bounds nanosecond timestamp: {val}" + ) from e if out_local == 1: # Store the out_tzoffset in seconds # since we store the total_seconds of @@ -520,7 +522,9 @@ def array_strptime( try: iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) except OverflowError as e: - raise OutOfBoundsDatetime from e + raise OutOfBoundsDatetime( + f"Out of bounds nanosecond timestamp: {val}" + ) from e result_timezone[i] = tz diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 683c312f5950b..bb70ce97abb35 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -83,7 +83,6 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, - check_dts_bounds, cmp_dtstructs, cmp_scalar, convert_reso, @@ -2499,7 +2498,6 @@ default 'raise' # We can avoid going through pydatetime paths, which is robust # to datetimes outside of pydatetime range. ts = _TSObject() - check_dts_bounds(&dts, self._creso) try: ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) except OverflowError as e: From 73a1507460ca2b01cd65a60df79e7989e4f721a7 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 20 Oct 2023 12:07:26 -0400 Subject: [PATCH 14/34] updates --- pandas/_libs/tslib.pyx | 7 +----- pandas/_libs/tslibs/conversion.pyx | 21 +++++++--------- pandas/_libs/tslibs/np_datetime.pyx | 18 +++++++------- pandas/_libs/tslibs/offsets.pyx | 12 +++++----- pandas/_libs/tslibs/period.pyx | 24 +++++++++---------- pandas/_libs/tslibs/strptime.pyx | 11 ++++----- pandas/_libs/tslibs/timestamps.pyx | 8 +++---- pandas/tests/tslibs/test_array_to_datetime.py | 3 ++- 8 files changed, 48 insertions(+), 56 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 7a108b2515be3..3f8aa64a60a93 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -489,12 +489,7 @@ cpdef array_to_datetime( iresult[i] = parse_pydatetime(val, &dts, utc_convert, creso=creso) elif PyDate_Check(val): - try: - iresult[i] = pydate_to_dt64(val, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime( - f"Out of bounds nanosecond timestamp: {val}" - ) from e + iresult[i] = pydate_to_dt64(val, &dts) elif is_datetime64_object(val): iresult[i] = get_datetime64_nanos(val, NPY_FR_ns) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 6dbb13e7ff095..a7f8334871636 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -208,10 +208,10 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1: pandas_datetime_to_datetimestruct(ival, unit, &dts) try: ival = npy_datetimestruct_to_datetime(reso, &dts) - except OverflowError as e: + except OverflowError as err: raise OutOfBoundsDatetime( "Out of bounds nanosecond timestamp: {val}" - ) from e + ) from err return ival @@ -404,8 +404,8 @@ cdef _TSObject convert_datetime_to_tsobject( try: obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) - except OverflowError as e: - raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from e + except OverflowError as err: + raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err if obj.tzinfo is not None and not is_utc(obj.tzinfo): offset = get_utcoffset(obj.tzinfo, ts) @@ -444,8 +444,8 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, try: value = npy_datetimestruct_to_datetime(reso, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err obj.dts = dts obj.tzinfo = timezone(timedelta(minutes=tzoffset)) obj.value = tz_localize_to_utc_single( @@ -543,8 +543,8 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit, else: try: ival = npy_datetimestruct_to_datetime(reso, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err if tz is not None: # shift for _localize_tso ival = tz_localize_to_utc_single( @@ -778,8 +778,5 @@ cdef int64_t parse_pydatetime( if isinstance(val, _Timestamp): result = (<_Timestamp>val)._as_creso(creso, round_ok=False)._value else: - try: - result = pydatetime_to_dt64(val, dts, reso=creso) - except OverflowError as e: - raise OutOfBoundsDatetime from e + result = pydatetime_to_dt64(val, dts, reso=creso) return result diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 90c3b6a279132..9b0684d641af9 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -270,8 +270,10 @@ cdef int64_t pydatetime_to_dt64(datetime val, pydatetime_to_dtstruct(val, dts) try: result = npy_datetimestruct_to_datetime(reso, dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime( + f"Out of bounds nanosecond timestamp: {val}" + ) from err return result @@ -292,8 +294,8 @@ cdef int64_t pydate_to_dt64( try: result = npy_datetimestruct_to_datetime(reso, dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err return result @@ -427,8 +429,8 @@ cpdef ndarray astype_overflowsafe( else: try: new_value = npy_datetimestruct_to_datetime(to_unit, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err # Analogous to: iresult[i] = new_value (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value @@ -678,7 +680,7 @@ cdef int64_t _convert_reso_with_dtstruct( pandas_datetime_to_datetimestruct(value, from_unit, &dts) try: result = npy_datetimestruct_to_datetime(to_unit, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err return result diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 28988b500b7fb..6d3753098085e 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -4901,8 +4901,8 @@ cdef ndarray shift_quarters( try: res_val = npy_datetimestruct_to_datetime(reso, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err # Analogous to: out[i] = res_val (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val @@ -4961,8 +4961,8 @@ def shift_months( dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) try: res_val = npy_datetimestruct_to_datetime(reso, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err # Analogous to: out[i] = res_val (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val @@ -4989,8 +4989,8 @@ def shift_months( try: res_val = npy_datetimestruct_to_datetime(reso, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err # Analogous to: out[i] = res_val (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 4bc110b924858..a268be14c17f0 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -736,8 +736,8 @@ cdef int64_t unix_date_from_ymd(int year, int month, int day): dts.day = day try: unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err return unix_date @@ -753,8 +753,8 @@ cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end): try: result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_Y, dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err if dts.month > to_end: return result + 1 @@ -814,22 +814,22 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq): elif freq_group == FR_WK: try: unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err return unix_date_to_week(unix_date, freq - FR_WK) elif freq == FR_BUS: try: unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err return DtoB(dts, 0, unix_date) unit = freq_group_code_to_npy_unit(freq) try: unix_date = npy_datetimestruct_to_datetime(unit, dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err return unix_date @@ -1186,8 +1186,8 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: try: result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from e + except OverflowError as err: + raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err return result diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 29407c7f2f46d..2a84ea443d659 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -59,7 +59,6 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, - check_dts_bounds, import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, @@ -295,12 +294,10 @@ def array_strptime( iresult[i] = val.tz_localize(None).as_unit("ns")._value else: iresult[i] = pydatetime_to_dt64(val.replace(tzinfo=None), &dts) - check_dts_bounds(&dts) result_timezone[i] = val.tzinfo continue elif PyDate_Check(val): iresult[i] = pydate_to_dt64(val, &dts) - check_dts_bounds(&dts) continue elif is_datetime64_object(val): iresult[i] = get_datetime64_nanos(val, NPY_FR_ns) @@ -329,10 +326,10 @@ def array_strptime( # where we left off try: value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) - except OverflowError as e: + except OverflowError as err: raise OutOfBoundsDatetime( f"Out of bounds nanosecond timestamp: {val}" - ) from e + ) from err if out_local == 1: # Store the out_tzoffset in seconds # since we store the total_seconds of @@ -521,10 +518,10 @@ def array_strptime( try: iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) - except OverflowError as e: + except OverflowError as err: raise OutOfBoundsDatetime( f"Out of bounds nanosecond timestamp: {val}" - ) from e + ) from err result_timezone[i] = tz diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index bb70ce97abb35..99fb962cd3078 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -584,8 +584,8 @@ cdef class _Timestamp(ABCTimestamp): # TODO: can + self.nanosecond also overflow here? val = npy_datetimestruct_to_datetime( self._creso, &dts) + self.nanosecond - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err else: val = self._value return val @@ -2500,8 +2500,8 @@ default 'raise' ts = _TSObject() try: ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e + except OverflowError as err: + raise OutOfBoundsDatetime from err ts.dts = dts ts.creso = self._creso ts.fold = fold diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index d83d59b932b4d..7e2813d1b1b61 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -13,6 +13,7 @@ iNaT, tslib, ) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas import Timestamp import pandas._testing as tm @@ -132,7 +133,7 @@ def test_coerce_outside_ns_bounds(invalid_date, errors): if errors == "raise": msg = "^Out of bounds nanosecond timestamp: .*, at position 0$" - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsDatetime, match=msg): tslib.array_to_datetime(**kwargs) else: # coerce. result, _ = tslib.array_to_datetime(**kwargs) From ef245092f3afca26a68e56631a792d3fd6fe0b62 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 20 Oct 2023 12:10:26 -0400 Subject: [PATCH 15/34] error message update --- pandas/_libs/tslibs/period.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index a268be14c17f0..07e50e29456eb 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1187,7 +1187,11 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: try: result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts) except OverflowError as err: - raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err + # TODO: this is copied from check_dts_bounds, with the thought that + # eventually we can get rid of check_dts_bounds + fmt = (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " + f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") + raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {fmt}") from err return result From 2240b091046d81e4975dc350b24ba5efbd6c67b4 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 20 Oct 2023 15:51:07 -0400 Subject: [PATCH 16/34] error fixups --- pandas/_libs/tslibs/np_datetime.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 9b0684d641af9..bdfc97f4446c6 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -295,7 +295,7 @@ cdef int64_t pydate_to_dt64( try: result = npy_datetimestruct_to_datetime(reso, dts) except OverflowError as err: - raise OutOfBoundsDatetime from err + raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {val}") from err return result From 16400026a892ab01578f327c2ead0f6a258dd25d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 21 Oct 2023 23:25:16 -0400 Subject: [PATCH 17/34] test fixup --- pandas/_libs/tslibs/timestamps.pyx | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 99fb962cd3078..fd76a4a101f5b 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -2501,7 +2501,13 @@ default 'raise' try: ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) except OverflowError as err: - raise OutOfBoundsDatetime from err + # TODO: this is copied from check_dts_bounds, with the thought that + # eventually we can get rid of check_dts_bounds + fmt = (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " + f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") + raise OutOfBoundsDatetime( + f"Out of bounds nanosecond timestamp: {fmt}" + ) from err ts.dts = dts ts.creso = self._creso ts.fold = fold From 9cdb9c901225ecab52efccee1b1a230d8ceb6fca Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 23 Oct 2023 10:00:09 -0400 Subject: [PATCH 18/34] clang-format --- .pre-commit-config.yaml | 20 +- .../src/vendored/numpy/datetime/np_datetime.c | 1682 ++++++++--------- 2 files changed, 847 insertions(+), 855 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c911edfa03670..a9a9baac6069a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -70,19 +70,6 @@ repos: - id: fix-encoding-pragma args: [--remove] - id: trailing-whitespace -- repo: https://github.com/cpplint/cpplint - rev: 1.6.1 - hooks: - - id: cpplint - exclude: ^pandas/_libs/include/pandas/vendored/klib - args: [ - --quiet, - '--extensions=c,h', - '--headers=h', - --recursive, - --linelength=88, - '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size' - ] - repo: https://github.com/pylint-dev/pylint rev: v3.0.0b0 hooks: @@ -127,6 +114,13 @@ repos: rev: v0.6.8 hooks: - id: sphinx-lint +- repo: https://github.com/pre-commit/mirrors-clang-format + rev: ea59a72 + hooks: + - id: clang-format + files: ^pandas/_libs/src|^pandas/_libs/include + args: [-i] + types_or: [c, c++] - repo: local hooks: - id: pyright diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 48f3c4e0a41af..e53ccabe721cd 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -20,60 +20,64 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #ifndef NPY_NO_DEPRECATED_API #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#endif // NPY_NO_DEPRECATED_API +#endif // NPY_NO_DEPRECATED_API #include +#include "pandas/vendored/numpy/datetime/np_datetime.h" #include #include #include -#include "pandas/vendored/numpy/datetime/np_datetime.h" #if defined(_WIN32) -# ifndef ENABLE_INTSAFE_SIGNED_FUNCTIONS -# define ENABLE_INTSAFE_SIGNED_FUNCTIONS -# endif -# include -# define checked_int64_add(a, b, res) LongLongAdd(a, b, res) -# define checked_int64_sub(a, b, res) LongLongSub(a, b, res) -# define checked_int64_mul(a, b, res) LongLongMult(a, b, res) +#ifndef ENABLE_INTSAFE_SIGNED_FUNCTIONS +#define ENABLE_INTSAFE_SIGNED_FUNCTIONS +#endif +#include +#define checked_int64_add(a, b, res) LongLongAdd(a, b, res) +#define checked_int64_sub(a, b, res) LongLongSub(a, b, res) +#define checked_int64_mul(a, b, res) LongLongMult(a, b, res) +#else +#if defined __has_builtin +#if __has_builtin(__builtin_add_overflow) +#if _LP64 || __LP64__ || _ILP64 || __ILP64__ +#define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) #else -# if defined __has_builtin -# if __has_builtin(__builtin_add_overflow) -# if _LP64 || __LP64__ || _ILP64 || __ILP64__ -# define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) -# define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) -# define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) -# else -# define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) -# define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) -# define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) -# endif -# else -_Static_assert(0, "Overflow checking not detected; please try a newer compiler"); -# endif +#define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) +#endif +#else +_Static_assert(0, + "Overflow checking not detected; please try a newer compiler"); +#endif // __has_builtin was added in gcc 10, but our muslinux_1_1 build environment // only has gcc-9.3, so fall back to __GNUC__ macro as long as we have that -# elif __GNUC__ > 7 -# if _LP64 || __LP64__ || _ILP64 || __ILP64__ -# define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) -# define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) -# define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) -# else -# define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) -# define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) -# define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) -# endif -# else +#elif __GNUC__ > 7 +#if _LP64 || __LP64__ || _ILP64 || __ILP64__ +#define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) +#else +#define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) +#endif +#else _Static_assert(0, "__has_builtin not detected; please try a newer compiler"); -# endif +#endif #endif -#define PD_CHECK_OVERFLOW(FUNC) do { if ((FUNC) != 0) { \ - PyErr_SetString(PyExc_OverflowError, \ - "Overflow occurred in npy_datetimestruct_to_datetime"); \ - return -1; \ -}} while (0) +#define PD_CHECK_OVERFLOW(FUNC) \ + do { \ + if ((FUNC) != 0) { \ + PyErr_SetString(PyExc_OverflowError, \ + "Overflow occurred in npy_datetimestruct_to_datetime"); \ + return -1; \ + } \ + } while (0) const int days_per_month_table[2][12] = { {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, @@ -83,8 +87,8 @@ const int days_per_month_table[2][12] = { * Returns 1 if the given year is a leap year, 0 otherwise. */ int is_leapyear(npy_int64 year) { - return (year & 0x3) == 0 && /* year % 4 == 0 */ - ((year % 100) != 0 || (year % 400) == 0); + return (year & 0x3) == 0 && /* year % 4 == 0 */ + ((year % 100) != 0 || (year % 400) == 0); } /* @@ -92,108 +96,108 @@ int is_leapyear(npy_int64 year) { * the current values are valid.g */ void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { - int isleap; + int isleap; - /* MINUTES */ - dts->min += minutes; - while (dts->min < 0) { - dts->min += 60; - dts->hour--; - } - while (dts->min >= 60) { - dts->min -= 60; - dts->hour++; - } + /* MINUTES */ + dts->min += minutes; + while (dts->min < 0) { + dts->min += 60; + dts->hour--; + } + while (dts->min >= 60) { + dts->min -= 60; + dts->hour++; + } - /* HOURS */ - while (dts->hour < 0) { - dts->hour += 24; - dts->day--; - } - while (dts->hour >= 24) { - dts->hour -= 24; - dts->day++; - } + /* HOURS */ + while (dts->hour < 0) { + dts->hour += 24; + dts->day--; + } + while (dts->hour >= 24) { + dts->hour -= 24; + dts->day++; + } - /* DAYS */ - if (dts->day < 1) { - dts->month--; - if (dts->month < 1) { - dts->year--; - dts->month = 12; - } - isleap = is_leapyear(dts->year); - dts->day += days_per_month_table[isleap][dts->month - 1]; - } else if (dts->day > 28) { - isleap = is_leapyear(dts->year); - if (dts->day > days_per_month_table[isleap][dts->month - 1]) { - dts->day -= days_per_month_table[isleap][dts->month - 1]; - dts->month++; - if (dts->month > 12) { - dts->year++; - dts->month = 1; - } - } + /* DAYS */ + if (dts->day < 1) { + dts->month--; + if (dts->month < 1) { + dts->year--; + dts->month = 12; + } + isleap = is_leapyear(dts->year); + dts->day += days_per_month_table[isleap][dts->month - 1]; + } else if (dts->day > 28) { + isleap = is_leapyear(dts->year); + if (dts->day > days_per_month_table[isleap][dts->month - 1]) { + dts->day -= days_per_month_table[isleap][dts->month - 1]; + dts->month++; + if (dts->month > 12) { + dts->year++; + dts->month = 1; + } } + } } /* * Calculates the days offset from the 1970 epoch. */ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { - int i, month; - npy_int64 year, days = 0; - const int *month_lengths; - - year = dts->year - 1970; - days = year * 365; - - /* Adjust for leap years */ - if (days >= 0) { - /* - * 1968 is the closest leap year before 1970. - * Exclude the current year, so add 1. - */ - year += 1; - /* Add one day for each 4 years */ - days += year / 4; - /* 1900 is the closest previous year divisible by 100 */ - year += 68; - /* Subtract one day for each 100 years */ - days -= year / 100; - /* 1600 is the closest previous year divisible by 400 */ - year += 300; - /* Add one day for each 400 years */ - days += year / 400; - } else { - /* - * 1972 is the closest later year after 1970. - * Include the current year, so subtract 2. - */ - year -= 2; - /* Subtract one day for each 4 years */ - days += year / 4; - /* 2000 is the closest later year divisible by 100 */ - year -= 28; - /* Add one day for each 100 years */ - days -= year / 100; - /* 2000 is also the closest later year divisible by 400 */ - /* Subtract one day for each 400 years */ - days += year / 400; - } + int i, month; + npy_int64 year, days = 0; + const int *month_lengths; - month_lengths = days_per_month_table[is_leapyear(dts->year)]; - month = dts->month - 1; + year = dts->year - 1970; + days = year * 365; - /* Add the months */ - for (i = 0; i < month; ++i) { - days += month_lengths[i]; - } + /* Adjust for leap years */ + if (days >= 0) { + /* + * 1968 is the closest leap year before 1970. + * Exclude the current year, so add 1. + */ + year += 1; + /* Add one day for each 4 years */ + days += year / 4; + /* 1900 is the closest previous year divisible by 100 */ + year += 68; + /* Subtract one day for each 100 years */ + days -= year / 100; + /* 1600 is the closest previous year divisible by 400 */ + year += 300; + /* Add one day for each 400 years */ + days += year / 400; + } else { + /* + * 1972 is the closest later year after 1970. + * Include the current year, so subtract 2. + */ + year -= 2; + /* Subtract one day for each 4 years */ + days += year / 4; + /* 2000 is the closest later year divisible by 100 */ + year -= 28; + /* Add one day for each 100 years */ + days -= year / 100; + /* 2000 is also the closest later year divisible by 400 */ + /* Subtract one day for each 400 years */ + days += year / 400; + } - /* Add the days */ - days += dts->day - 1; + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + month = dts->month - 1; - return days; + /* Add the months */ + for (i = 0; i < month; ++i) { + days += month_lengths[i]; + } + + /* Add the days */ + days += dts->day - 1; + + return days; } /* @@ -201,62 +205,61 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { * and returns the year. */ static npy_int64 days_to_yearsdays(npy_int64 *days_) { - const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); - /* Adjust so it's relative to the year 2000 (divisible by 400) */ - npy_int64 days = (*days_) - (365 * 30 + 7); - npy_int64 year; - - /* Break down the 400 year cycle to get the year and day within the year */ - if (days >= 0) { - year = 400 * (days / days_per_400years); - days = days % days_per_400years; - } else { - year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); - days = days % days_per_400years; - if (days < 0) { - days += days_per_400years; - } + const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); + /* Adjust so it's relative to the year 2000 (divisible by 400) */ + npy_int64 days = (*days_) - (365 * 30 + 7); + npy_int64 year; + + /* Break down the 400 year cycle to get the year and day within the year */ + if (days >= 0) { + year = 400 * (days / days_per_400years); + days = days % days_per_400years; + } else { + year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); + days = days % days_per_400years; + if (days < 0) { + days += days_per_400years; } + } - /* Work out the year/day within the 400 year cycle */ - if (days >= 366) { - year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); - days = (days - 1) % (100 * 365 + 25 - 1); - if (days >= 365) { - year += 4 * ((days + 1) / (4 * 365 + 1)); - days = (days + 1) % (4 * 365 + 1); - if (days >= 366) { - year += (days - 1) / 365; - days = (days - 1) % 365; - } - } + /* Work out the year/day within the 400 year cycle */ + if (days >= 366) { + year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); + days = (days - 1) % (100 * 365 + 25 - 1); + if (days >= 365) { + year += 4 * ((days + 1) / (4 * 365 + 1)); + days = (days + 1) % (4 * 365 + 1); + if (days >= 366) { + year += (days - 1) / 365; + days = (days - 1) % 365; + } } + } - *days_ = days; - return year + 2000; + *days_ = days; + return year + 2000; } - /* * Fills in the year, month, day in 'dts' based on the days * offset from 1970. */ static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { - const int *month_lengths; - int i; - - dts->year = days_to_yearsdays(&days); - month_lengths = days_per_month_table[is_leapyear(dts->year)]; - - for (i = 0; i < 12; ++i) { - if (days < month_lengths[i]) { - dts->month = i + 1; - dts->day = days + 1; - return; - } else { - days -= month_lengths[i]; - } + const int *month_lengths; + int i; + + dts->year = days_to_yearsdays(&days); + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + + for (i = 0; i < 12; ++i) { + if (days < month_lengths[i]) { + dts->month = i + 1; + dts->day = days + 1; + return; + } else { + days -= month_lengths[i]; } + } } /* @@ -264,98 +267,97 @@ static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { */ int cmp_npy_datetimestruct(const npy_datetimestruct *a, const npy_datetimestruct *b) { - if (a->year > b->year) { - return 1; - } else if (a->year < b->year) { - return -1; - } + if (a->year > b->year) { + return 1; + } else if (a->year < b->year) { + return -1; + } - if (a->month > b->month) { - return 1; - } else if (a->month < b->month) { - return -1; - } + if (a->month > b->month) { + return 1; + } else if (a->month < b->month) { + return -1; + } - if (a->day > b->day) { - return 1; - } else if (a->day < b->day) { - return -1; - } + if (a->day > b->day) { + return 1; + } else if (a->day < b->day) { + return -1; + } - if (a->hour > b->hour) { - return 1; - } else if (a->hour < b->hour) { - return -1; - } + if (a->hour > b->hour) { + return 1; + } else if (a->hour < b->hour) { + return -1; + } - if (a->min > b->min) { - return 1; - } else if (a->min < b->min) { - return -1; - } + if (a->min > b->min) { + return 1; + } else if (a->min < b->min) { + return -1; + } - if (a->sec > b->sec) { - return 1; - } else if (a->sec < b->sec) { - return -1; - } + if (a->sec > b->sec) { + return 1; + } else if (a->sec < b->sec) { + return -1; + } - if (a->us > b->us) { - return 1; - } else if (a->us < b->us) { - return -1; - } + if (a->us > b->us) { + return 1; + } else if (a->us < b->us) { + return -1; + } - if (a->ps > b->ps) { - return 1; - } else if (a->ps < b->ps) { - return -1; - } + if (a->ps > b->ps) { + return 1; + } else if (a->ps < b->ps) { + return -1; + } - if (a->as > b->as) { - return 1; - } else if (a->as < b->as) { - return -1; - } + if (a->as > b->as) { + return 1; + } else if (a->as < b->as) { + return -1; + } - return 0; + return 0; } /* -* Returns the offset from utc of the timezone as a timedelta. -* The caller is responsible for ensuring that the tzinfo -* attribute exists on the datetime object. -* -* If the passed object is timezone naive, Py_None is returned. -* If extraction of the offset fails, NULL is returned. -* -* NOTE: This function is not vendored from numpy. -*/ + * Returns the offset from utc of the timezone as a timedelta. + * The caller is responsible for ensuring that the tzinfo + * attribute exists on the datetime object. + * + * If the passed object is timezone naive, Py_None is returned. + * If extraction of the offset fails, NULL is returned. + * + * NOTE: This function is not vendored from numpy. + */ PyObject *extract_utc_offset(PyObject *obj) { - PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo"); - if (tmp == NULL) { - return NULL; - } - if (tmp != Py_None) { - PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); - if (offset == NULL) { - Py_DECREF(tmp); - return NULL; - } - return offset; + PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo"); + if (tmp == NULL) { + return NULL; + } + if (tmp != Py_None) { + PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); + if (offset == NULL) { + Py_DECREF(tmp); + return NULL; } - return tmp; + return offset; + } + return tmp; } - -static inline int scaleYearToEpoch(int64_t year, int64_t* result) { +static inline int scaleYearToEpoch(int64_t year, int64_t *result) { return checked_int64_sub(year, 1970, result); } -static inline int scaleYearsToMonths(int64_t years, int64_t* result) { +static inline int scaleYearsToMonths(int64_t years, int64_t *result) { return checked_int64_mul(years, 12, result); } -static inline int scaleDaysToWeeks(int64_t days, int64_t* result) { +static inline int scaleDaysToWeeks(int64_t days, int64_t *result) { if (days >= 0) { *result = days / 7; return 0; @@ -371,43 +373,43 @@ static inline int scaleDaysToWeeks(int64_t days, int64_t* result) { } } -static inline int scaleDaysToHours(int64_t days, int64_t* result) { +static inline int scaleDaysToHours(int64_t days, int64_t *result) { return checked_int64_mul(days, 24, result); } -static inline int scaleHoursToMinutes(int64_t hours, int64_t* result) { +static inline int scaleHoursToMinutes(int64_t hours, int64_t *result) { return checked_int64_mul(hours, 60, result); } -static inline int scaleMinutesToSeconds(int64_t minutes, int64_t* result) { +static inline int scaleMinutesToSeconds(int64_t minutes, int64_t *result) { return checked_int64_mul(minutes, 60, result); } -static inline int scaleSecondsToMilliseconds(int64_t seconds, int64_t* result) { +static inline int scaleSecondsToMilliseconds(int64_t seconds, int64_t *result) { return checked_int64_mul(seconds, 1000, result); } -static inline int scaleSecondsToMicroseconds(int64_t seconds, int64_t* result) { +static inline int scaleSecondsToMicroseconds(int64_t seconds, int64_t *result) { return checked_int64_mul(seconds, 1000000, result); } static inline int scaleMicrosecondsToNanoseconds(int64_t microseconds, - int64_t* result) { + int64_t *result) { return checked_int64_mul(microseconds, 1000, result); } static inline int scaleMicrosecondsToPicoseconds(int64_t microseconds, - int64_t* result) { + int64_t *result) { return checked_int64_mul(microseconds, 1000000, result); } static inline int64_t scalePicosecondsToFemtoseconds(int64_t picoseconds, - int64_t* result) { + int64_t *result) { return checked_int64_mul(picoseconds, 1000, result); } static inline int64_t scalePicosecondsToAttoseconds(int64_t picoseconds, - int64_t* result) { + int64_t *result) { return checked_int64_mul(picoseconds, 1000000, result); } @@ -417,112 +419,116 @@ static inline int64_t scalePicosecondsToAttoseconds(int64_t picoseconds, */ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, const npy_datetimestruct *dts) { - if ((base == NPY_FR_Y) || (base == NPY_FR_M)) { - int64_t years; - PD_CHECK_OVERFLOW(scaleYearToEpoch(dts->year, &years)); - - if (base == NPY_FR_Y) { - return years; - } + if ((base == NPY_FR_Y) || (base == NPY_FR_M)) { + int64_t years; + PD_CHECK_OVERFLOW(scaleYearToEpoch(dts->year, &years)); - int64_t months; - PD_CHECK_OVERFLOW(scaleYearsToMonths(years, &months)); + if (base == NPY_FR_Y) { + return years; + } - int64_t months_adder; - PD_CHECK_OVERFLOW(checked_int64_sub(dts->month, 1, &months_adder)); - PD_CHECK_OVERFLOW(checked_int64_add(months, months_adder, &months)); + int64_t months; + PD_CHECK_OVERFLOW(scaleYearsToMonths(years, &months)); - if (base == NPY_FR_M) { - return months; - } - } + int64_t months_adder; + PD_CHECK_OVERFLOW(checked_int64_sub(dts->month, 1, &months_adder)); + PD_CHECK_OVERFLOW(checked_int64_add(months, months_adder, &months)); - const int64_t days = get_datetimestruct_days(dts); - if (base == NPY_FR_D) { - return days; + if (base == NPY_FR_M) { + return months; } + } - if (base == NPY_FR_W) { - int64_t weeks; - PD_CHECK_OVERFLOW(scaleDaysToWeeks(days, &weeks)); - return weeks; - } + const int64_t days = get_datetimestruct_days(dts); + if (base == NPY_FR_D) { + return days; + } - int64_t hours; - PD_CHECK_OVERFLOW(scaleDaysToHours(days, &hours)); - PD_CHECK_OVERFLOW(checked_int64_add(hours, dts->hour, &hours)); + if (base == NPY_FR_W) { + int64_t weeks; + PD_CHECK_OVERFLOW(scaleDaysToWeeks(days, &weeks)); + return weeks; + } - if (base == NPY_FR_h) { - return hours; - } + int64_t hours; + PD_CHECK_OVERFLOW(scaleDaysToHours(days, &hours)); + PD_CHECK_OVERFLOW(checked_int64_add(hours, dts->hour, &hours)); + if (base == NPY_FR_h) { + return hours; + } - int64_t minutes; - PD_CHECK_OVERFLOW(scaleHoursToMinutes(hours, &minutes)); - PD_CHECK_OVERFLOW(checked_int64_add(minutes, dts->min, &minutes)); + int64_t minutes; + PD_CHECK_OVERFLOW(scaleHoursToMinutes(hours, &minutes)); + PD_CHECK_OVERFLOW(checked_int64_add(minutes, dts->min, &minutes)); - if (base == NPY_FR_m) { - return minutes; - } + if (base == NPY_FR_m) { + return minutes; + } - int64_t seconds; - PD_CHECK_OVERFLOW(scaleMinutesToSeconds(minutes, &seconds)); - PD_CHECK_OVERFLOW(checked_int64_add(seconds, dts->sec, &seconds)); + int64_t seconds; + PD_CHECK_OVERFLOW(scaleMinutesToSeconds(minutes, &seconds)); + PD_CHECK_OVERFLOW(checked_int64_add(seconds, dts->sec, &seconds)); - if (base == NPY_FR_s) { - return seconds; - } + if (base == NPY_FR_s) { + return seconds; + } - if (base == NPY_FR_ms) { - int64_t milliseconds; - PD_CHECK_OVERFLOW(scaleSecondsToMilliseconds(seconds, &milliseconds)); - PD_CHECK_OVERFLOW(checked_int64_add(milliseconds, dts->us / 1000, &milliseconds)); + if (base == NPY_FR_ms) { + int64_t milliseconds; + PD_CHECK_OVERFLOW(scaleSecondsToMilliseconds(seconds, &milliseconds)); + PD_CHECK_OVERFLOW( + checked_int64_add(milliseconds, dts->us / 1000, &milliseconds)); - return milliseconds; - } + return milliseconds; + } - int64_t microseconds; - PD_CHECK_OVERFLOW(scaleSecondsToMicroseconds(seconds, µseconds)); - PD_CHECK_OVERFLOW(checked_int64_add(microseconds, dts->us, µseconds)); + int64_t microseconds; + PD_CHECK_OVERFLOW(scaleSecondsToMicroseconds(seconds, µseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(microseconds, dts->us, µseconds)); - if (base == NPY_FR_us) { - return microseconds; - } + if (base == NPY_FR_us) { + return microseconds; + } - if (base == NPY_FR_ns) { - int64_t nanoseconds; - PD_CHECK_OVERFLOW(scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds)); - PD_CHECK_OVERFLOW(checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds)); + if (base == NPY_FR_ns) { + int64_t nanoseconds; + PD_CHECK_OVERFLOW( + scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds)); + PD_CHECK_OVERFLOW( + checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds)); - return nanoseconds; - } + return nanoseconds; + } - int64_t picoseconds; - PD_CHECK_OVERFLOW(scaleMicrosecondsToPicoseconds(microseconds, &picoseconds)); - PD_CHECK_OVERFLOW(checked_int64_add(picoseconds, dts->ps, &picoseconds)); + int64_t picoseconds; + PD_CHECK_OVERFLOW(scaleMicrosecondsToPicoseconds(microseconds, &picoseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(picoseconds, dts->ps, &picoseconds)); - if (base == NPY_FR_ps) { - return picoseconds; - } + if (base == NPY_FR_ps) { + return picoseconds; + } - if (base == NPY_FR_fs) { - int64_t femtoseconds; - PD_CHECK_OVERFLOW(scalePicosecondsToFemtoseconds(picoseconds, &femtoseconds)); - PD_CHECK_OVERFLOW(checked_int64_add(femtoseconds, dts->as / 1000, &femtoseconds)); - return femtoseconds; - } + if (base == NPY_FR_fs) { + int64_t femtoseconds; + PD_CHECK_OVERFLOW( + scalePicosecondsToFemtoseconds(picoseconds, &femtoseconds)); + PD_CHECK_OVERFLOW( + checked_int64_add(femtoseconds, dts->as / 1000, &femtoseconds)); + return femtoseconds; + } - if (base == NPY_FR_as) { - int64_t attoseconds; - PD_CHECK_OVERFLOW(scalePicosecondsToAttoseconds(picoseconds, &attoseconds)); - PD_CHECK_OVERFLOW(checked_int64_add(attoseconds, dts->as, &attoseconds)); - return attoseconds; - } + if (base == NPY_FR_as) { + int64_t attoseconds; + PD_CHECK_OVERFLOW(scalePicosecondsToAttoseconds(picoseconds, &attoseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(attoseconds, dts->as, &attoseconds)); + return attoseconds; + } - /* Something got corrupted */ - PyErr_SetString(PyExc_ValueError, - "NumPy datetime metadata with corrupt unit value"); - return -1; + /* Something got corrupted */ + PyErr_SetString(PyExc_ValueError, + "NumPy datetime metadata with corrupt unit value"); + return -1; } /* @@ -534,164 +540,161 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, * for subsequent calls to this command - it is able to deduce that `*d >= 0`. */ npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { - assert(unit > 0); - npy_int64 div = *d / unit; - npy_int64 mod = *d % unit; - if (mod < 0) { - mod += unit; - div -= 1; - } - assert(mod >= 0); - *d = mod; - return div; + assert(unit > 0); + npy_int64 div = *d / unit; + npy_int64 mod = *d % unit; + if (mod < 0) { + mod += unit; + div -= 1; + } + assert(mod >= 0); + *d = mod; + return div; } /* * Converts a datetime based on the given metadata into a datetimestruct */ -void pandas_datetime_to_datetimestruct(npy_datetime dt, - NPY_DATETIMEUNIT base, +void pandas_datetime_to_datetimestruct(npy_datetime dt, NPY_DATETIMEUNIT base, npy_datetimestruct *out) { - npy_int64 perday; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->year = 1970; - out->month = 1; - out->day = 1; - - /* - * Note that care must be taken with the / and % operators - * for negative values. - */ - switch (base) { - case NPY_FR_Y: - out->year = 1970 + dt; - break; - - case NPY_FR_M: - out->year = 1970 + extract_unit(&dt, 12); - out->month = dt + 1; - break; - - case NPY_FR_W: - /* A week is 7 days */ - set_datetimestruct_days(dt * 7, out); - break; - - case NPY_FR_D: - set_datetimestruct_days(dt, out); - break; - - case NPY_FR_h: - perday = 24LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = dt; - break; - - case NPY_FR_m: - perday = 24LL * 60; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 60); - out->min = (int)dt; - break; - - case NPY_FR_s: - perday = 24LL * 60 * 60; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 60 * 60); - out->min = (int)extract_unit(&dt, 60); - out->sec = (int)dt; - break; - - case NPY_FR_ms: - perday = 24LL * 60 * 60 * 1000; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 60); - out->sec = (int)extract_unit(&dt, 1000LL); - out->us = (int)(dt * 1000); - break; - - case NPY_FR_us: - perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000); - out->us = (int)dt; - break; - - case NPY_FR_ns: - perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->us = (int)extract_unit(&dt, 1000LL); - out->ps = (int)(dt * 1000); - break; - - case NPY_FR_ps: - perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->us = (int)extract_unit(&dt, 1000LL); - out->ps = (int)(dt * 1000); - break; - - case NPY_FR_fs: - /* entire range is only +- 2.6 hours */ - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 60 * 60); - if (out->hour < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour += 24; - assert(out->hour >= 0); - } - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000); - out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->ps = (int)extract_unit(&dt, 1000LL); - out->as = (int)(dt * 1000); - break; - - case NPY_FR_as: - /* entire range is only +- 9.2 seconds */ - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 1000); - if (out->sec < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour = 23; - out->min = 59; - out->sec += 60; - assert(out->sec >= 0); - } - out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); - out->ps = (int)extract_unit(&dt, 1000LL * 1000); - out->as = (int)dt; - break; - - default: - PyErr_SetString(PyExc_RuntimeError, - "NumPy datetime metadata is corrupted with invalid " - "base unit"); + npy_int64 perday; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->year = 1970; + out->month = 1; + out->day = 1; + + /* + * Note that care must be taken with the / and % operators + * for negative values. + */ + switch (base) { + case NPY_FR_Y: + out->year = 1970 + dt; + break; + + case NPY_FR_M: + out->year = 1970 + extract_unit(&dt, 12); + out->month = dt + 1; + break; + + case NPY_FR_W: + /* A week is 7 days */ + set_datetimestruct_days(dt * 7, out); + break; + + case NPY_FR_D: + set_datetimestruct_days(dt, out); + break; + + case NPY_FR_h: + perday = 24LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = dt; + break; + + case NPY_FR_m: + perday = 24LL * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60); + out->min = (int)dt; + break; + + case NPY_FR_s: + perday = 24LL * 60 * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60 * 60); + out->min = (int)extract_unit(&dt, 60); + out->sec = (int)dt; + break; + + case NPY_FR_ms: + perday = 24LL * 60 * 60 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 60); + out->sec = (int)extract_unit(&dt, 1000LL); + out->us = (int)(dt * 1000); + break; + + case NPY_FR_us: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000); + out->us = (int)dt; + break; + + case NPY_FR_ns: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_ps: + perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_fs: + /* entire range is only +- 2.6 hours */ + out->hour = + (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 60 * 60); + if (out->hour < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour += 24; + assert(out->hour >= 0); + } + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL); + out->as = (int)(dt * 1000); + break; + + case NPY_FR_as: + /* entire range is only +- 9.2 seconds */ + out->sec = + (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 1000); + if (out->sec < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour = 23; + out->min = 59; + out->sec += 60; + assert(out->sec >= 0); } + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL * 1000); + out->as = (int)dt; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy datetime metadata is corrupted with invalid " + "base unit"); + } } /* @@ -703,363 +706,358 @@ void pandas_datetime_to_datetimestruct(npy_datetime dt, void pandas_timedelta_to_timedeltastruct(npy_timedelta td, NPY_DATETIMEUNIT base, pandas_timedeltastruct *out) { - npy_int64 frac; - npy_int64 sfrac; - npy_int64 ifrac; - int sign; - npy_int64 per_day; - npy_int64 per_sec; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(pandas_timedeltastruct)); - - switch (base) { - case NPY_FR_ns: - - per_day = 86400000000000LL; - per_sec = 1000LL * 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac / (1000LL * 1000LL); - ifrac -= out->ms * 1000LL * 1000LL; - out->us = ifrac / 1000LL; - ifrac -= out->us * 1000LL; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_us: - - per_day = 86400000000LL; - per_sec = 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac / 1000LL; - ifrac -= out->ms * 1000LL; - out->us = ifrac / 1L; - ifrac -= out->us * 1L; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_ms: - - per_day = 86400000LL; - per_sec = 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac; - out->us = 0; - out->ns = 0; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_s: - // special case where we can simplify many expressions bc per_sec=1 - - per_day = 86400LL; - per_sec = 1L; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = 0; - out->us = 0; - out->ns = 0; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_m: - - out->days = td / 1440LL; - td -= out->days * 1440LL; - out->hrs = td / 60LL; - td -= out->hrs * 60LL; - out->min = td; - - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_h: - out->days = td / 24LL; - td -= out->days * 24LL; - out->hrs = td; - - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_D: - out->days = td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_W: - out->days = 7 * td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - default: - PyErr_SetString(PyExc_RuntimeError, - "NumPy timedelta metadata is corrupted with " - "invalid base unit"); + npy_int64 frac; + npy_int64 sfrac; + npy_int64 ifrac; + int sign; + npy_int64 per_day; + npy_int64 per_sec; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(pandas_timedeltastruct)); + + switch (base) { + case NPY_FR_ns: + + per_day = 86400000000000LL; + per_sec = 1000LL * 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; } - out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; - out->microseconds = out->ms * 1000 + out->us; - out->nanoseconds = out->ns; -} + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / (1000LL * 1000LL); + ifrac -= out->ms * 1000LL * 1000LL; + out->us = ifrac / 1000LL; + ifrac -= out->us * 1000LL; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_us: + + per_day = 86400000000LL; + per_sec = 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / 1000LL; + ifrac -= out->ms * 1000LL; + out->us = ifrac / 1L; + ifrac -= out->us * 1L; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_ms: + + per_day = 86400000LL; + per_sec = 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + case NPY_FR_s: + // special case where we can simplify many expressions bc per_sec=1 + + per_day = 86400LL; + per_sec = 1L; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = 0; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_m: + + out->days = td / 1440LL; + td -= out->days * 1440LL; + out->hrs = td / 60LL; + td -= out->hrs * 60LL; + out->min = td; + + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_h: + out->days = td / 24LL; + td -= out->days * 24LL; + out->hrs = td; + + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_D: + out->days = td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_W: + out->days = 7 * td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy timedelta metadata is corrupted with " + "invalid base unit"); + } + + out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->microseconds = out->ms * 1000 + out->us; + out->nanoseconds = out->ns; +} /* * This function returns a pointer to the DateTimeMetaData @@ -1069,5 +1067,5 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, */ PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { - return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); + return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); } From 4d8696c09f64de19e08a6db97b77ac1e75fcf460 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 26 Oct 2023 14:20:19 -0400 Subject: [PATCH 19/34] updates --- pandas/_libs/tslib.pyx | 4 ++-- pandas/_libs/tslibs/timestamps.pyx | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index bd85114d77908..06107176fc292 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -97,8 +97,8 @@ def _test_parse_iso8601(ts: str): string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True) try: obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts) - except OverflowError as e: - raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {ts}") from e + except OverflowError as err: + raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {ts}") from err if out_local == 1: obj.tzinfo = timezone(timedelta(minutes=out_tzoffset)) obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 7d3a27cefa80d..22494f2b80fc4 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -2500,12 +2500,11 @@ default 'raise' try: ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) except OverflowError as err: - # TODO: this is copied from check_dts_bounds, with the thought that - # eventually we can get rid of check_dts_bounds + # TODO: create shared function to create this format from dts struct fmt = (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") raise OutOfBoundsDatetime( - f"Out of bounds nanosecond timestamp: {fmt}" + f"Out of bounds timestamp: {fmt} with precision {self._creso}" ) from err ts.dts = dts ts.creso = self._creso From b3d5b7ceda18f66a11cd885d1ba460ab37471040 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 26 Oct 2023 15:45:12 -0400 Subject: [PATCH 20/34] fixed error message --- pandas/_libs/tslibs/timestamps.pyx | 3 ++- pandas/tests/scalar/timestamp/methods/test_replace.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 22494f2b80fc4..3ce099eea16d1 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -2504,7 +2504,8 @@ default 'raise' fmt = (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") raise OutOfBoundsDatetime( - f"Out of bounds timestamp: {fmt} with precision {self._creso}" + f"Out of bounds timestamp: {fmt} " + f"with NPY_DATETIMEUNIT {self._creso}" ) from err ts.dts = dts ts.creso = self._creso diff --git a/pandas/tests/scalar/timestamp/methods/test_replace.py b/pandas/tests/scalar/timestamp/methods/test_replace.py index 9b2b21cf7f388..4357851e935ff 100644 --- a/pandas/tests/scalar/timestamp/methods/test_replace.py +++ b/pandas/tests/scalar/timestamp/methods/test_replace.py @@ -21,7 +21,7 @@ def test_replace_out_of_pydatetime_bounds(self): # GH#50348 ts = Timestamp("2016-01-01").as_unit("ns") - msg = "Out of bounds nanosecond timestamp: 99999-01-01 00:00:00" + msg = "Out of bounds timestamp: 99999-01-01 00:00:00 with NPY_DATETIMEUNIT 10" with pytest.raises(OutOfBoundsDatetime, match=msg): ts.replace(year=99_999) From 2f60947b0934a93160158a05f627d93cbe1d0462 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 26 Oct 2023 16:34:02 -0400 Subject: [PATCH 21/34] try nogil --- .../src/vendored/numpy/datetime/np_datetime.c | 5 ++ pandas/_libs/tslibs/period.pyx | 77 +++++++++---------- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index e53ccabe721cd..01e11e5138a8e 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -73,8 +73,10 @@ _Static_assert(0, "__has_builtin not detected; please try a newer compiler"); #define PD_CHECK_OVERFLOW(FUNC) \ do { \ if ((FUNC) != 0) { \ + PyGILState_STATE gstate = PyGILState_Ensure(); \ PyErr_SetString(PyExc_OverflowError, \ "Overflow occurred in npy_datetimestruct_to_datetime"); \ + PyGILState_Release(gstate); \ return -1; \ } \ } while (0) @@ -526,8 +528,11 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, } /* Something got corrupted */ + PyGILState_STATE gstate = PyGILState_Ensure(); PyErr_SetString(PyExc_ValueError, "NumPy datetime metadata with corrupt unit value"); + PyGILState_Release(gstate); + return -1; } diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 8e408d45879a3..46e5b9af96b0d 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -131,7 +131,7 @@ ctypedef struct asfreq_info: int to_end int from_end -ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) +ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) except? -1 nogil cdef extern from *: @@ -340,7 +340,7 @@ cdef int64_t downsample_daytime(int64_t ordinal, asfreq_info *af_info) noexcept cdef int64_t transform_via_day(int64_t ordinal, asfreq_info *af_info, freq_conv_func first_func, - freq_conv_func second_func): + freq_conv_func second_func) except? -1 nogil: cdef: int64_t result @@ -352,7 +352,7 @@ cdef int64_t transform_via_day(int64_t ordinal, # -------------------------------------------------------------------- # Conversion _to_ Daily Freq -cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: cdef: int64_t unix_date npy_datetimestruct dts @@ -368,7 +368,7 @@ cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info): return upsample_daytime(unix_date, af_info) -cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: cdef: int64_t unix_date npy_datetimestruct dts @@ -384,7 +384,7 @@ cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info): return upsample_daytime(unix_date, af_info) -cdef int64_t asfreq_MtoDT(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_MtoDT(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: cdef: int64_t unix_date int year, month @@ -408,7 +408,7 @@ cdef int64_t asfreq_WtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: # -------------------------------------------------------------------- # Conversion _to_ BusinessDay Freq -cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: cdef: int roll_back npy_datetimestruct dts @@ -419,7 +419,7 @@ cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info): return DtoB(&dts, roll_back, unix_date) -cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: cdef: int roll_back npy_datetimestruct dts @@ -430,7 +430,7 @@ cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info): return DtoB(&dts, roll_back, unix_date) -cdef int64_t asfreq_MtoB(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_MtoB(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: cdef: int roll_back npy_datetimestruct dts @@ -467,7 +467,7 @@ cdef int64_t asfreq_DTtoB(int64_t ordinal, asfreq_info *af_info) noexcept nogil: # ---------------------------------------------------------------------- # Conversion _from_ Daily Freq -cdef int64_t asfreq_DTtoA(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_DTtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: cdef: npy_datetimestruct dts @@ -520,30 +520,30 @@ cdef int64_t unix_date_to_week(int64_t unix_date, int to_end) noexcept nogil: # -------------------------------------------------------------------- # Conversion _from_ BusinessDay Freq -cdef int64_t asfreq_BtoDT(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_BtoDT(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: ordinal = ((ordinal + 3) // 5) * 7 + (ordinal + 3) % 5 - 3 return upsample_daytime(ordinal, af_info) -cdef int64_t asfreq_BtoA(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_BtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoA) -cdef int64_t asfreq_BtoQ(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_BtoQ(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_BtoM(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_BtoM(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoM) -cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoW) @@ -552,25 +552,25 @@ cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info): # ---------------------------------------------------------------------- # Conversion _from_ Annual Freq -cdef int64_t asfreq_AtoA(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_AtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoA) -cdef int64_t asfreq_AtoQ(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_AtoQ(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_AtoM(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_AtoM(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoM) -cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoW) @@ -579,25 +579,25 @@ cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info): # ---------------------------------------------------------------------- # Conversion _from_ Quarterly Freq -cdef int64_t asfreq_QtoQ(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_QtoQ(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_QtoA(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_QtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoA) -cdef int64_t asfreq_QtoM(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_QtoM(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoM) -cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoW) @@ -606,19 +606,19 @@ cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info): # ---------------------------------------------------------------------- # Conversion _from_ Monthly Freq -cdef int64_t asfreq_MtoA(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_MtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoA) -cdef int64_t asfreq_MtoQ(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_MtoQ(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoW) @@ -627,25 +627,25 @@ cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info): # ---------------------------------------------------------------------- # Conversion _from_ Weekly Freq -cdef int64_t asfreq_WtoA(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_WtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoA) -cdef int64_t asfreq_WtoQ(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_WtoQ(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_WtoM(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_WtoM(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoM) -cdef int64_t asfreq_WtoW(int64_t ordinal, asfreq_info *af_info): +cdef int64_t asfreq_WtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoW) @@ -723,7 +723,7 @@ cdef void adjust_dts_for_qtr(npy_datetimestruct* dts, int to_end) noexcept nogil # Find the unix_date (days elapsed since datetime(1970, 1, 1) # for the given year/month/day. # Assumes GREGORIAN_CALENDAR */ -cdef int64_t unix_date_from_ymd(int year, int month, int day): +cdef int64_t unix_date_from_ymd(int year, int month, int day) except? -1 nogil: # Calculate the absolute date cdef: npy_datetimestruct dts @@ -733,10 +733,8 @@ cdef int64_t unix_date_from_ymd(int year, int month, int day): dts.year = year dts.month = month dts.day = day - try: - unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, &dts) - except OverflowError as err: - raise OutOfBoundsDatetime from err + + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, &dts) return unix_date @@ -746,14 +744,11 @@ cdef int64_t dts_to_month_ordinal(npy_datetimestruct* dts) noexcept nogil: return ((dts.year - 1970) * 12 + dts.month - 1) -cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end): +cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) except? -1 nogil: cdef: int64_t result - try: - result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_Y, dts) - except OverflowError as err: - raise OutOfBoundsDatetime from err + result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_Y, dts) if dts.month > to_end: return result + 1 @@ -834,7 +829,7 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq): cdef void get_date_info(int64_t ordinal, - int freq, npy_datetimestruct *dts): + int freq, npy_datetimestruct *dts) nogil: cdef: int64_t unix_date, nanos npy_datetimestruct dts2 @@ -852,7 +847,7 @@ cdef void get_date_info(int64_t ordinal, dts.ps = dts2.ps -cdef int64_t get_unix_date(int64_t period_ordinal, int freq): +cdef int64_t get_unix_date(int64_t period_ordinal, int freq) except? -1 nogil: """ Returns the proleptic Gregorian ordinal of the date, as an integer. This corresponds to the number of days since Jan., 1st, 1970 AD. From 03315fccdc5604f0f84c4c647ebee6d2686427b2 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 1 Nov 2023 13:21:36 -0400 Subject: [PATCH 22/34] revert offsets changes --- pandas/_libs/tslibs/offsets.pyx | 185 +++++++++++++++----------------- 1 file changed, 88 insertions(+), 97 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 2ad8b1d01bcc4..df3a2e3ecde48 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -75,7 +75,6 @@ from pandas._libs.tslibs.np_datetime cimport ( pandas_datetime_to_datetimestruct, pydate_to_dtstruct, ) -from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime import_pandas_datetime() @@ -3077,56 +3076,54 @@ cdef class SemiMonthOffset(SingleConstructorOffset): NPY_DATETIMEUNIT reso = get_unit_from_dtype(dtarr.dtype) cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, i8other) - for i in range(count): - # Analogous to: val = i8other[i] - val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - - if val == NPY_NAT: - res_val = NPY_NAT + with nogil: + for i in range(count): + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - else: - pandas_datetime_to_datetimestruct(val, reso, &dts) - day = dts.day - - # Adjust so that we are always looking at self.day_of_month, - # incrementing/decrementing n if necessary. - nadj = roll_convention(day, n, anchor_dom) - - days_in_month = get_days_in_month(dts.year, dts.month) - # For SemiMonthBegin on other.day == 1 and - # SemiMonthEnd on other.day == days_in_month, - # shifting `other` to `self.day_of_month` _always_ requires - # incrementing/decrementing `n`, regardless of whether it is - # initially positive. - if is_start and (n <= 0 and day == 1): - nadj -= 1 - elif (not is_start) and (n > 0 and day == days_in_month): - nadj += 1 - - if is_start: - # See also: SemiMonthBegin._apply - months = nadj // 2 + nadj % 2 - to_day = 1 if nadj % 2 else anchor_dom + if val == NPY_NAT: + res_val = NPY_NAT else: - # See also: SemiMonthEnd._apply - months = nadj // 2 - to_day = 31 if nadj % 2 else anchor_dom - - dts.year = year_add_months(dts, months) - dts.month = month_add_months(dts, months) - days_in_month = get_days_in_month(dts.year, dts.month) - dts.day = min(to_day, days_in_month) + pandas_datetime_to_datetimestruct(val, reso, &dts) + day = dts.day + + # Adjust so that we are always looking at self.day_of_month, + # incrementing/decrementing n if necessary. + nadj = roll_convention(day, n, anchor_dom) + + days_in_month = get_days_in_month(dts.year, dts.month) + # For SemiMonthBegin on other.day == 1 and + # SemiMonthEnd on other.day == days_in_month, + # shifting `other` to `self.day_of_month` _always_ requires + # incrementing/decrementing `n`, regardless of whether it is + # initially positive. + if is_start and (n <= 0 and day == 1): + nadj -= 1 + elif (not is_start) and (n > 0 and day == days_in_month): + nadj += 1 + + if is_start: + # See also: SemiMonthBegin._apply + months = nadj // 2 + nadj % 2 + to_day = 1 if nadj % 2 else anchor_dom + + else: + # See also: SemiMonthEnd._apply + months = nadj // 2 + to_day = 31 if nadj % 2 else anchor_dom + + dts.year = year_add_months(dts, months) + dts.month = month_add_months(dts, months) + days_in_month = get_days_in_month(dts.year, dts.month) + dts.day = min(to_day, days_in_month) - try: res_val = npy_datetimestruct_to_datetime(reso, &dts) - except OverflowError as e: - raise OutOfBoundsDatetime from e - # Analogous to: out[i] = res_val - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - cnp.PyArray_MultiIter_NEXT(mi) + cnp.PyArray_MultiIter_NEXT(mi) return out @@ -4902,32 +4899,30 @@ cdef ndarray shift_quarters( npy_datetimestruct dts cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, dtindex) - for i in range(count): - # Analogous to: val = dtindex[i] - val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + with nogil: + for i in range(count): + # Analogous to: val = dtindex[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - if val == NPY_NAT: - res_val = NPY_NAT - else: - pandas_datetime_to_datetimestruct(val, reso, &dts) - n = quarters + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + n = quarters - months_since = (dts.month - q1start_month) % modby - n = _roll_qtrday(&dts, n, months_since, day_opt) + months_since = (dts.month - q1start_month) % modby + n = _roll_qtrday(&dts, n, months_since, day_opt) - dts.year = year_add_months(dts, modby * n - months_since) - dts.month = month_add_months(dts, modby * n - months_since) - dts.day = get_day_of_month(&dts, day_opt) + dts.year = year_add_months(dts, modby * n - months_since) + dts.month = month_add_months(dts, modby * n - months_since) + dts.day = get_day_of_month(&dts, day_opt) - try: res_val = npy_datetimestruct_to_datetime(reso, &dts) - except OverflowError as err: - raise OutOfBoundsDatetime from err - # Analogous to: out[i] = res_val - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - cnp.PyArray_MultiIter_NEXT(mi) + cnp.PyArray_MultiIter_NEXT(mi) return out @@ -4967,55 +4962,51 @@ def shift_months( if day_opt is None: # TODO: can we combine this with the non-None case? - for i in range(count): - # Analogous to: val = i8other[i] - val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + with nogil: + for i in range(count): + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - if val == NPY_NAT: - res_val = NPY_NAT - else: - pandas_datetime_to_datetimestruct(val, reso, &dts) - dts.year = year_add_months(dts, months) - dts.month = month_add_months(dts, months) + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + dts.year = year_add_months(dts, months) + dts.month = month_add_months(dts, months) - dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) - try: + dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) res_val = npy_datetimestruct_to_datetime(reso, &dts) - except OverflowError as err: - raise OutOfBoundsDatetime from err - # Analogous to: out[i] = res_val - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - cnp.PyArray_MultiIter_NEXT(mi) + cnp.PyArray_MultiIter_NEXT(mi) else: - for i in range(count): + with nogil: + for i in range(count): - # Analogous to: val = i8other[i] - val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - if val == NPY_NAT: - res_val = NPY_NAT - else: - pandas_datetime_to_datetimestruct(val, reso, &dts) - months_to_roll = months + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + months_to_roll = months - months_to_roll = _roll_qtrday(&dts, months_to_roll, 0, day_opt) + months_to_roll = _roll_qtrday(&dts, months_to_roll, 0, day_opt) - dts.year = year_add_months(dts, months_to_roll) - dts.month = month_add_months(dts, months_to_roll) - dts.day = get_day_of_month(&dts, day_opt) + dts.year = year_add_months(dts, months_to_roll) + dts.month = month_add_months(dts, months_to_roll) + dts.day = get_day_of_month(&dts, day_opt) - try: res_val = npy_datetimestruct_to_datetime(reso, &dts) - except OverflowError as err: - raise OutOfBoundsDatetime from err - # Analogous to: out[i] = res_val - (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - cnp.PyArray_MultiIter_NEXT(mi) + cnp.PyArray_MultiIter_NEXT(mi) return out From b3a80b4b96d7080179b3c8f53a936e35406da43b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 1 Nov 2023 13:29:40 -0400 Subject: [PATCH 23/34] simplified error handling --- pandas/_libs/tslibs/conversion.pyx | 10 ++-------- pandas/_libs/tslibs/np_datetime.pyx | 5 +---- pandas/_libs/tslibs/period.pxd | 2 +- pandas/_libs/tslibs/period.pyx | 5 +---- pandas/_libs/tslibs/timestamps.pyx | 8 ++------ 5 files changed, 7 insertions(+), 23 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 696d1ed332dc5..5e3e9fa6d603e 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -408,10 +408,7 @@ cdef _TSObject convert_datetime_to_tsobject( if nanos: obj.dts.ps = nanos * 1000 - try: - obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) - except OverflowError as err: - raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err + obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) if obj.tzinfo is not None and not is_utc(obj.tzinfo): offset = get_utcoffset(obj.tzinfo, ts) @@ -526,10 +523,7 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit, obj = _TSObject() obj.dts = dts obj.creso = reso - try: - ival = npy_datetimestruct_to_datetime(reso, &dts) - except OverflowError as err: - raise OutOfBoundsDatetime from err + ival = npy_datetimestruct_to_datetime(reso, &dts) if out_local == 1: obj.tzinfo = timezone(timedelta(minutes=out_tzoffset)) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 63d01bed16318..92ddfc2690406 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -412,10 +412,7 @@ cpdef ndarray astype_overflowsafe( else: raise else: - try: - new_value = npy_datetimestruct_to_datetime(to_unit, &dts) - except OverflowError as err: - raise OutOfBoundsDatetime from err + new_value = npy_datetimestruct_to_datetime(to_unit, &dts) # Analogous to: iresult[i] = new_value (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value diff --git a/pandas/_libs/tslibs/period.pxd b/pandas/_libs/tslibs/period.pxd index cbddc12616c9f..2d18ca75ae445 100644 --- a/pandas/_libs/tslibs/period.pxd +++ b/pandas/_libs/tslibs/period.pxd @@ -4,4 +4,4 @@ from .np_datetime cimport npy_datetimestruct cdef bint is_period_object(object obj) -cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) except? -1 diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 46e5b9af96b0d..f3ddf87701a5f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -733,9 +733,7 @@ cdef int64_t unix_date_from_ymd(int year, int month, int day) except? -1 nogil: dts.year = year dts.month = month dts.day = day - unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, &dts) - return unix_date @@ -749,7 +747,6 @@ cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) except? -1 int64_t result result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_Y, dts) - if dts.month > to_end: return result + 1 else: @@ -777,7 +774,7 @@ cdef int get_anchor_month(int freq, int freq_group) noexcept nogil: # specifically _dont_ use cdvision or else ordinals near -1 are assigned to # incorrect dates GH#19643 @cython.cdivision(False) -cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq): +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) except? -1: """ Generate an ordinal in period space diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3ce099eea16d1..fb0ead6d95289 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -579,12 +579,8 @@ cdef class _Timestamp(ABCTimestamp): if own_tz is not None and not is_utc(own_tz): pydatetime_to_dtstruct(self, &dts) - try: - # TODO: can + self.nanosecond also overflow here? - val = npy_datetimestruct_to_datetime( - self._creso, &dts) + self.nanosecond - except OverflowError as err: - raise OutOfBoundsDatetime from err + # TODO: can + self.nanosecond also overflow here? + val = npy_datetimestruct_to_datetime(self._creso, &dts) + self.nanosecond else: val = self._value return val From 038db5fc43b6a5841aee1e07fc8b5daa92bcc4a0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 1 Nov 2023 14:23:45 -0400 Subject: [PATCH 24/34] period fixup --- pandas/_libs/tslibs/period.pxd | 2 +- pandas/_libs/tslibs/period.pyx | 17 ++++------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/tslibs/period.pxd b/pandas/_libs/tslibs/period.pxd index 2d18ca75ae445..a02723408ec0c 100644 --- a/pandas/_libs/tslibs/period.pxd +++ b/pandas/_libs/tslibs/period.pxd @@ -4,4 +4,4 @@ from .np_datetime cimport npy_datetimestruct cdef bint is_period_object(object obj) -cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) except? -1 +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) except? -1 nogil diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f3ddf87701a5f..ccb94831f50a9 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -774,7 +774,7 @@ cdef int get_anchor_month(int freq, int freq_group) noexcept nogil: # specifically _dont_ use cdvision or else ordinals near -1 are assigned to # incorrect dates GH#19643 @cython.cdivision(False) -cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) except? -1: +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) except? -1 nogil: """ Generate an ordinal in period space @@ -803,24 +803,15 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) except? -1: return dts_to_qtr_ordinal(dts, fmonth) elif freq_group == FR_WK: - try: - unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) - except OverflowError as err: - raise OutOfBoundsDatetime from err + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) return unix_date_to_week(unix_date, freq - FR_WK) elif freq == FR_BUS: - try: - unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) - except OverflowError as err: - raise OutOfBoundsDatetime from err + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) return DtoB(dts, 0, unix_date) unit = freq_group_code_to_npy_unit(freq) - try: - unix_date = npy_datetimestruct_to_datetime(unit, dts) - except OverflowError as err: - raise OutOfBoundsDatetime from err + unix_date = npy_datetimestruct_to_datetime(unit, dts) return unix_date From df2a4df6f4bdfd1161a3fa8a1da4c0cb97f5ee3b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 1 Nov 2023 14:49:32 -0400 Subject: [PATCH 25/34] fixed test failure --- pandas/_libs/tslibs/conversion.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5e3e9fa6d603e..4d697db2e3fd6 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -408,14 +408,16 @@ cdef _TSObject convert_datetime_to_tsobject( if nanos: obj.dts.ps = nanos * 1000 - obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) + try: + obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) + except OverflowError as err: + raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err if obj.tzinfo is not None and not is_utc(obj.tzinfo): offset = get_utcoffset(obj.tzinfo, ts) pps = periods_per_second(reso) obj.value -= int(offset.total_seconds() * pps) - check_dts_bounds(&obj.dts, reso) check_overflows(obj, reso) return obj From 1afb12d1da23635ab84f7b8455f2636a282f7fc3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 1 Nov 2023 16:58:44 -0400 Subject: [PATCH 26/34] try speedup --- pandas/_libs/tslibs/timestamps.pxd | 2 +- pandas/_libs/tslibs/timestamps.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 26018cd904249..bd73c713f6c04 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -26,7 +26,7 @@ cdef class _Timestamp(ABCTimestamp): cdef bint _get_start_end_field(self, str field, freq) cdef _get_date_name_field(self, str field, object locale) - cdef int64_t _maybe_convert_value_to_local(self) + cdef int64_t _maybe_convert_value_to_local(self) except? -1 cdef bint _can_compare(self, datetime other) cpdef to_datetime64(self) cpdef datetime to_pydatetime(_Timestamp self, bint warn=*) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index fb0ead6d95289..1047f9b2ac21e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -570,7 +570,7 @@ cdef class _Timestamp(ABCTimestamp): # ----------------------------------------------------------------- - cdef int64_t _maybe_convert_value_to_local(self): + cdef int64_t _maybe_convert_value_to_local(self) except? -1: """Convert UTC i8 value to local i8 value if tz exists""" cdef: int64_t val From e2936428f6b19012f181af8c65372e5df19c6c15 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 1 Nov 2023 23:46:42 -0400 Subject: [PATCH 27/34] updated benchmark --- asv_bench/benchmarks/tslibs/period.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/tslibs/period.py b/asv_bench/benchmarks/tslibs/period.py index 67f3b7736018d..af3bfac6d3d01 100644 --- a/asv_bench/benchmarks/tslibs/period.py +++ b/asv_bench/benchmarks/tslibs/period.py @@ -151,7 +151,11 @@ def setup(self, size, freq, tz): # tzlocal is cumbersomely slow, so skip to keep runtime in check raise NotImplementedError - arr = np.arange(10, dtype="i8").repeat(size // 10) + # we pick 2**55 because smaller values end up returning + # -1 from npy_datetimestruct_to_datetime with NPY_FR_Y frequency + # this artificially slows down functions since -1 is also the + # error sentinel + arr = np.arange(2**55, 2**55 + 10, dtype="i8").repeat(size // 10) self.i8values = arr def time_dt64arr_to_periodarr(self, size, freq, tz): From 7ee90ddecfe75969a7874b06c740ca9033b750fe Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 2 Nov 2023 23:09:47 -0400 Subject: [PATCH 28/34] revert noexcepts --- pandas/_libs/tslibs/period.pxd | 2 +- pandas/_libs/tslibs/period.pyx | 72 +++++++++++++++++----------------- 2 files changed, 36 insertions(+), 38 deletions(-) diff --git a/pandas/_libs/tslibs/period.pxd b/pandas/_libs/tslibs/period.pxd index a02723408ec0c..8afe7c9b3f693 100644 --- a/pandas/_libs/tslibs/period.pxd +++ b/pandas/_libs/tslibs/period.pxd @@ -4,4 +4,4 @@ from .np_datetime cimport npy_datetimestruct cdef bint is_period_object(object obj) -cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) except? -1 nogil +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) noexcept nogil diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index ccb94831f50a9..4e6215dfc113c 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -131,7 +131,7 @@ ctypedef struct asfreq_info: int to_end int from_end -ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) except? -1 nogil +ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) noexcept nogil cdef extern from *: @@ -340,7 +340,7 @@ cdef int64_t downsample_daytime(int64_t ordinal, asfreq_info *af_info) noexcept cdef int64_t transform_via_day(int64_t ordinal, asfreq_info *af_info, freq_conv_func first_func, - freq_conv_func second_func) except? -1 nogil: + freq_conv_func second_func) noexcept nogil: cdef: int64_t result @@ -352,7 +352,7 @@ cdef int64_t transform_via_day(int64_t ordinal, # -------------------------------------------------------------------- # Conversion _to_ Daily Freq -cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: cdef: int64_t unix_date npy_datetimestruct dts @@ -368,7 +368,7 @@ cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info) except? -1 nogi return upsample_daytime(unix_date, af_info) -cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: cdef: int64_t unix_date npy_datetimestruct dts @@ -384,7 +384,7 @@ cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info) except? -1 nogi return upsample_daytime(unix_date, af_info) -cdef int64_t asfreq_MtoDT(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_MtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: cdef: int64_t unix_date int year, month @@ -408,7 +408,7 @@ cdef int64_t asfreq_WtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: # -------------------------------------------------------------------- # Conversion _to_ BusinessDay Freq -cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info) noexcept nogil: cdef: int roll_back npy_datetimestruct dts @@ -419,7 +419,7 @@ cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info) except? -1 nogil return DtoB(&dts, roll_back, unix_date) -cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info) noexcept nogil: cdef: int roll_back npy_datetimestruct dts @@ -430,7 +430,7 @@ cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info) except? -1 nogil return DtoB(&dts, roll_back, unix_date) -cdef int64_t asfreq_MtoB(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_MtoB(int64_t ordinal, asfreq_info *af_info) noexcept nogil: cdef: int roll_back npy_datetimestruct dts @@ -467,7 +467,7 @@ cdef int64_t asfreq_DTtoB(int64_t ordinal, asfreq_info *af_info) noexcept nogil: # ---------------------------------------------------------------------- # Conversion _from_ Daily Freq -cdef int64_t asfreq_DTtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_DTtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: cdef: npy_datetimestruct dts @@ -520,30 +520,30 @@ cdef int64_t unix_date_to_week(int64_t unix_date, int to_end) noexcept nogil: # -------------------------------------------------------------------- # Conversion _from_ BusinessDay Freq -cdef int64_t asfreq_BtoDT(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_BtoDT(int64_t ordinal, asfreq_info *af_info) noexcept nogil: ordinal = ((ordinal + 3) // 5) * 7 + (ordinal + 3) % 5 - 3 return upsample_daytime(ordinal, af_info) -cdef int64_t asfreq_BtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_BtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoA) -cdef int64_t asfreq_BtoQ(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_BtoQ(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_BtoM(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_BtoM(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoM) -cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoW) @@ -552,25 +552,25 @@ cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil # ---------------------------------------------------------------------- # Conversion _from_ Annual Freq -cdef int64_t asfreq_AtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_AtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoA) -cdef int64_t asfreq_AtoQ(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_AtoQ(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_AtoM(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_AtoM(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoM) -cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoW) @@ -579,25 +579,25 @@ cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil # ---------------------------------------------------------------------- # Conversion _from_ Quarterly Freq -cdef int64_t asfreq_QtoQ(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_QtoQ(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_QtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_QtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoA) -cdef int64_t asfreq_QtoM(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_QtoM(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoM) -cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoW) @@ -606,19 +606,19 @@ cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil # ---------------------------------------------------------------------- # Conversion _from_ Monthly Freq -cdef int64_t asfreq_MtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_MtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoA) -cdef int64_t asfreq_MtoQ(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_MtoQ(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoW) @@ -627,25 +627,25 @@ cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil # ---------------------------------------------------------------------- # Conversion _from_ Weekly Freq -cdef int64_t asfreq_WtoA(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_WtoA(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoA) -cdef int64_t asfreq_WtoQ(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_WtoQ(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoQ) -cdef int64_t asfreq_WtoM(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_WtoM(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoM) -cdef int64_t asfreq_WtoW(int64_t ordinal, asfreq_info *af_info) except? -1 nogil: +cdef int64_t asfreq_WtoW(int64_t ordinal, asfreq_info *af_info) noexcept nogil: return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoW) @@ -723,7 +723,7 @@ cdef void adjust_dts_for_qtr(npy_datetimestruct* dts, int to_end) noexcept nogil # Find the unix_date (days elapsed since datetime(1970, 1, 1) # for the given year/month/day. # Assumes GREGORIAN_CALENDAR */ -cdef int64_t unix_date_from_ymd(int year, int month, int day) except? -1 nogil: +cdef int64_t unix_date_from_ymd(int year, int month, int day) noexcept nogil: # Calculate the absolute date cdef: npy_datetimestruct dts @@ -742,7 +742,7 @@ cdef int64_t dts_to_month_ordinal(npy_datetimestruct* dts) noexcept nogil: return ((dts.year - 1970) * 12 + dts.month - 1) -cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) except? -1 nogil: +cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) noexcept nogil: cdef: int64_t result @@ -774,7 +774,7 @@ cdef int get_anchor_month(int freq, int freq_group) noexcept nogil: # specifically _dont_ use cdvision or else ordinals near -1 are assigned to # incorrect dates GH#19643 @cython.cdivision(False) -cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) except? -1 nogil: +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) noexcept nogil: """ Generate an ordinal in period space @@ -811,13 +811,11 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) except? -1 no return DtoB(dts, 0, unix_date) unit = freq_group_code_to_npy_unit(freq) - unix_date = npy_datetimestruct_to_datetime(unit, dts) - - return unix_date + return npy_datetimestruct_to_datetime(unit, dts) cdef void get_date_info(int64_t ordinal, - int freq, npy_datetimestruct *dts) nogil: + int freq, npy_datetimestruct *dts) noexcept nogil: cdef: int64_t unix_date, nanos npy_datetimestruct dts2 @@ -835,7 +833,7 @@ cdef void get_date_info(int64_t ordinal, dts.ps = dts2.ps -cdef int64_t get_unix_date(int64_t period_ordinal, int freq) except? -1 nogil: +cdef int64_t get_unix_date(int64_t period_ordinal, int freq) noexcept nogil: """ Returns the proleptic Gregorian ordinal of the date, as an integer. This corresponds to the number of days since Jan., 1st, 1970 AD. From eab64ac30d38591ef3e713f67c709e177a58ae8c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 10 Nov 2023 08:18:49 -0500 Subject: [PATCH 29/34] shared function for dts formatting --- pandas/_libs/tslibs/np_datetime.pxd | 2 ++ pandas/_libs/tslibs/np_datetime.pyx | 8 ++++++-- pandas/_libs/tslibs/period.pyx | 10 +++++----- pandas/_libs/tslibs/timestamps.pyx | 8 +++----- pandas/tests/scalar/timestamp/methods/test_replace.py | 2 +- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 5d2b0ba2ceed4..ce29ca1a0ed71 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -80,6 +80,8 @@ cdef inline void import_pandas_datetime() noexcept: cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 +cdef object dts_to_iso_string(npy_datetimestruct *dts) + cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?) cdef int64_t pydatetime_to_dt64( diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 92ddfc2690406..757f6183b0822 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -194,6 +194,11 @@ cdef get_implementation_bounds( raise NotImplementedError(reso) +cdef object dts_to_iso_string(npy_datetimestruct *dts): + return (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " + f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") + + cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=NPY_FR_ns): """Raises OutOfBoundsDatetime if the given date is outside the range that can be represented by nanosecond-resolution 64-bit integers.""" @@ -209,8 +214,7 @@ cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=NPY_FR_ns): error = True if error: - fmt = (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " - f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") + fmt = dts_to_iso_string(dts) # TODO: "nanosecond" in the message assumes NPY_FR_ns raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {fmt}") diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index d227f32cb3551..889d898a88e41 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -43,7 +43,10 @@ from pandas._libs.tslibs.dtypes cimport ( freq_to_period_freqstr, ) -from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime +from pandas._libs.tslibs.np_datetime import ( + OutOfBoundsDatetime, + dts_to_iso_string, +) # import datetime C API import_datetime() @@ -1167,10 +1170,7 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: try: result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts) except OverflowError as err: - # TODO: this is copied from check_dts_bounds, with the thought that - # eventually we can get rid of check_dts_bounds - fmt = (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " - f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") + fmt = dts_to_iso_string(dts) raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {fmt}") from err return result diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9e9a9d36fb1f2..3935ddae1aa56 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -100,6 +100,7 @@ import_pandas_datetime() from pandas._libs.tslibs.np_datetime import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, + dts_to_iso_string, ) from pandas._libs.tslibs.offsets cimport to_offset @@ -2492,12 +2493,9 @@ default 'raise' try: ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) except OverflowError as err: - # TODO: create shared function to create this format from dts struct - fmt = (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " - f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") + fmt = dts_to_iso_string(dts) raise OutOfBoundsDatetime( - f"Out of bounds timestamp: {fmt} " - f"with NPY_DATETIMEUNIT {self._creso}" + f"Out of bounds timestamp: {fmt} with frequency '{self.unit}'" ) from err ts.dts = dts ts.creso = self._creso diff --git a/pandas/tests/scalar/timestamp/methods/test_replace.py b/pandas/tests/scalar/timestamp/methods/test_replace.py index 4357851e935ff..af95ba59e2a21 100644 --- a/pandas/tests/scalar/timestamp/methods/test_replace.py +++ b/pandas/tests/scalar/timestamp/methods/test_replace.py @@ -21,7 +21,7 @@ def test_replace_out_of_pydatetime_bounds(self): # GH#50348 ts = Timestamp("2016-01-01").as_unit("ns") - msg = "Out of bounds timestamp: 99999-01-01 00:00:00 with NPY_DATETIMEUNIT 10" + msg = "Out of bounds timestamp: 99999-01-01 00:00:00 with precision 'ns'" with pytest.raises(OutOfBoundsDatetime, match=msg): ts.replace(year=99_999) From 7fbd191f635d6546290a60aaf2da1ce23ba14976 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 10 Nov 2023 08:26:10 -0500 Subject: [PATCH 30/34] import -> cimport --- pandas/_libs/tslibs/period.pyx | 6 ++---- pandas/_libs/tslibs/timestamps.pyx | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 889d898a88e41..76fd5272f0f71 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -43,10 +43,7 @@ from pandas._libs.tslibs.dtypes cimport ( freq_to_period_freqstr, ) -from pandas._libs.tslibs.np_datetime import ( - OutOfBoundsDatetime, - dts_to_iso_string, -) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime # import datetime C API import_datetime() @@ -57,6 +54,7 @@ from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_D, astype_overflowsafe, + dts_to_iso_string, import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3935ddae1aa56..b36848bf936e9 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -86,6 +86,7 @@ from pandas._libs.tslibs.np_datetime cimport ( cmp_dtstructs, cmp_scalar, convert_reso, + dts_to_iso_string, get_datetime64_unit, get_unit_from_dtype, import_pandas_datetime, @@ -100,7 +101,6 @@ import_pandas_datetime() from pandas._libs.tslibs.np_datetime import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, - dts_to_iso_string, ) from pandas._libs.tslibs.offsets cimport to_offset From 4eba919342edf76ee96ac29a69d09751a1e2fda1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 10 Nov 2023 08:27:14 -0500 Subject: [PATCH 31/34] pass address --- pandas/_libs/tslibs/period.pyx | 2 +- pandas/_libs/tslibs/timestamps.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 76fd5272f0f71..318e018689a78 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1168,7 +1168,7 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: try: result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts) except OverflowError as err: - fmt = dts_to_iso_string(dts) + fmt = dts_to_iso_string(&dts) raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {fmt}") from err return result diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b36848bf936e9..64a22cae931b6 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -2493,7 +2493,7 @@ default 'raise' try: ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) except OverflowError as err: - fmt = dts_to_iso_string(dts) + fmt = dts_to_iso_string(&dts) raise OutOfBoundsDatetime( f"Out of bounds timestamp: {fmt} with frequency '{self.unit}'" ) from err From 9e1914d05b7c4460de25d0a451fd4549a1eea2e9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 10 Nov 2023 08:52:00 -0500 Subject: [PATCH 32/34] typo --- pandas/tests/scalar/timestamp/methods/test_replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/timestamp/methods/test_replace.py b/pandas/tests/scalar/timestamp/methods/test_replace.py index af95ba59e2a21..8a208455edc82 100644 --- a/pandas/tests/scalar/timestamp/methods/test_replace.py +++ b/pandas/tests/scalar/timestamp/methods/test_replace.py @@ -21,7 +21,7 @@ def test_replace_out_of_pydatetime_bounds(self): # GH#50348 ts = Timestamp("2016-01-01").as_unit("ns") - msg = "Out of bounds timestamp: 99999-01-01 00:00:00 with precision 'ns'" + msg = "Out of bounds timestamp: 99999-01-01 00:00:00 with frequency 'ns'" with pytest.raises(OutOfBoundsDatetime, match=msg): ts.replace(year=99_999) From 162c8587fb2d198c08d6dd69366acfb2363e1bf8 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 13 Nov 2023 20:02:30 -0500 Subject: [PATCH 33/34] remove comment --- pandas/_libs/tslibs/timestamps.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 64a22cae931b6..56a6885d4a9e0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -580,7 +580,6 @@ cdef class _Timestamp(ABCTimestamp): if own_tz is not None and not is_utc(own_tz): pydatetime_to_dtstruct(self, &dts) - # TODO: can + self.nanosecond also overflow here? val = npy_datetimestruct_to_datetime(self._creso, &dts) + self.nanosecond else: val = self._value From de90a2f4c52ae62a8e789cec1822b1f387bb146e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 14 Nov 2023 18:08:52 -0500 Subject: [PATCH 34/34] cdef object -> str --- pandas/_libs/tslibs/np_datetime.pxd | 2 +- pandas/_libs/tslibs/np_datetime.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index ce29ca1a0ed71..9cc211b748f68 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -80,7 +80,7 @@ cdef inline void import_pandas_datetime() noexcept: cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 -cdef object dts_to_iso_string(npy_datetimestruct *dts) +cdef str dts_to_iso_string(npy_datetimestruct *dts) cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 757f6183b0822..71a194177bf82 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -194,7 +194,7 @@ cdef get_implementation_bounds( raise NotImplementedError(reso) -cdef object dts_to_iso_string(npy_datetimestruct *dts): +cdef str dts_to_iso_string(npy_datetimestruct *dts): return (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}")