diff --git a/asv_bench/benchmarks/tslibs/period.py b/asv_bench/benchmarks/tslibs/period.py index 67f3b7736018d..af3bfac6d3d01 100644 --- a/asv_bench/benchmarks/tslibs/period.py +++ b/asv_bench/benchmarks/tslibs/period.py @@ -151,7 +151,11 @@ def setup(self, size, freq, tz): # tzlocal is cumbersomely slow, so skip to keep runtime in check raise NotImplementedError - arr = np.arange(10, dtype="i8").repeat(size // 10) + # we pick 2**55 because smaller values end up returning + # -1 from npy_datetimestruct_to_datetime with NPY_FR_Y frequency + # this artificially slows down functions since -1 is also the + # error sentinel + arr = np.arange(2**55, 2**55 + 10, dtype="i8").repeat(size // 10) self.i8values = arr def time_dt64arr_to_periodarr(self, size, freq, tz): diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index db1c735bd6094..01e11e5138a8e 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -29,6 +29,58 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include #include +#if defined(_WIN32) +#ifndef ENABLE_INTSAFE_SIGNED_FUNCTIONS +#define ENABLE_INTSAFE_SIGNED_FUNCTIONS +#endif +#include +#define checked_int64_add(a, b, res) LongLongAdd(a, b, res) +#define checked_int64_sub(a, b, res) LongLongSub(a, b, res) +#define checked_int64_mul(a, b, res) LongLongMult(a, b, res) +#else +#if defined __has_builtin +#if __has_builtin(__builtin_add_overflow) +#if _LP64 || __LP64__ || _ILP64 || __ILP64__ +#define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) +#else +#define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) +#endif +#else +_Static_assert(0, + "Overflow checking not detected; please try a newer compiler"); +#endif +// __has_builtin was added in gcc 10, but our muslinux_1_1 build environment +// only has gcc-9.3, so fall back to __GNUC__ macro as long as we have that +#elif __GNUC__ > 7 +#if _LP64 || __LP64__ || _ILP64 || __ILP64__ +#define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res) +#else +#define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res) +#define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res) +#define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res) +#endif +#else +_Static_assert(0, "__has_builtin not detected; please try a newer compiler"); +#endif +#endif + +#define PD_CHECK_OVERFLOW(FUNC) \ + do { \ + if ((FUNC) != 0) { \ + PyGILState_STATE gstate = PyGILState_Ensure(); \ + PyErr_SetString(PyExc_OverflowError, \ + "Overflow occurred in npy_datetimestruct_to_datetime"); \ + PyGILState_Release(gstate); \ + return -1; \ + } \ + } while (0) + const int days_per_month_table[2][12] = { {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; @@ -299,96 +351,189 @@ PyObject *extract_utc_offset(PyObject *obj) { return tmp; } +static inline int scaleYearToEpoch(int64_t year, int64_t *result) { + return checked_int64_sub(year, 1970, result); +} + +static inline int scaleYearsToMonths(int64_t years, int64_t *result) { + return checked_int64_mul(years, 12, result); +} + +static inline int scaleDaysToWeeks(int64_t days, int64_t *result) { + if (days >= 0) { + *result = days / 7; + return 0; + } else { + int res; + int64_t checked_days; + if ((res = checked_int64_sub(days, 6, &checked_days))) { + return res; + } + + *result = checked_days / 7; + return 0; + } +} + +static inline int scaleDaysToHours(int64_t days, int64_t *result) { + return checked_int64_mul(days, 24, result); +} + +static inline int scaleHoursToMinutes(int64_t hours, int64_t *result) { + return checked_int64_mul(hours, 60, result); +} + +static inline int scaleMinutesToSeconds(int64_t minutes, int64_t *result) { + return checked_int64_mul(minutes, 60, result); +} + +static inline int scaleSecondsToMilliseconds(int64_t seconds, int64_t *result) { + return checked_int64_mul(seconds, 1000, result); +} + +static inline int scaleSecondsToMicroseconds(int64_t seconds, int64_t *result) { + return checked_int64_mul(seconds, 1000000, result); +} + +static inline int scaleMicrosecondsToNanoseconds(int64_t microseconds, + int64_t *result) { + return checked_int64_mul(microseconds, 1000, result); +} + +static inline int scaleMicrosecondsToPicoseconds(int64_t microseconds, + int64_t *result) { + return checked_int64_mul(microseconds, 1000000, result); +} + +static inline int64_t scalePicosecondsToFemtoseconds(int64_t picoseconds, + int64_t *result) { + return checked_int64_mul(picoseconds, 1000, result); +} + +static inline int64_t scalePicosecondsToAttoseconds(int64_t picoseconds, + int64_t *result) { + return checked_int64_mul(picoseconds, 1000000, result); +} + /* * Converts a datetime from a datetimestruct to a datetime based - * on a metadata unit. The date is assumed to be valid. + * on a metadata unit. Returns -1 on and sets PyErr on error. */ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, const npy_datetimestruct *dts) { - npy_datetime ret; - - if (base == NPY_FR_Y) { - /* Truncate to the year */ - ret = dts->year - 1970; - } else if (base == NPY_FR_M) { - /* Truncate to the month */ - ret = 12 * (dts->year - 1970) + (dts->month - 1); - } else { - /* Otherwise calculate the number of days to start */ - npy_int64 days = get_datetimestruct_days(dts); - - switch (base) { - case NPY_FR_W: - /* Truncate to weeks */ - if (days >= 0) { - ret = days / 7; - } else { - ret = (days - 6) / 7; - } - break; - case NPY_FR_D: - ret = days; - break; - case NPY_FR_h: - ret = days * 24 + dts->hour; - break; - case NPY_FR_m: - ret = (days * 24 + dts->hour) * 60 + dts->min; - break; - case NPY_FR_s: - ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; - break; - case NPY_FR_ms: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * 1000 + - dts->us / 1000; - break; - case NPY_FR_us: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us; - break; - case NPY_FR_ns: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000 + - dts->ps / 1000; - break; - case NPY_FR_ps: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps; - break; - case NPY_FR_fs: - /* only 2.6 hours */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000 + - dts->as / 1000; - break; - case NPY_FR_as: - /* only 9.2 secs */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000000 + - dts->as; - break; - default: - /* Something got corrupted */ - PyErr_SetString(PyExc_ValueError, - "NumPy datetime metadata with corrupt unit value"); - return -1; - } - } - return ret; + if ((base == NPY_FR_Y) || (base == NPY_FR_M)) { + int64_t years; + PD_CHECK_OVERFLOW(scaleYearToEpoch(dts->year, &years)); + + if (base == NPY_FR_Y) { + return years; + } + + int64_t months; + PD_CHECK_OVERFLOW(scaleYearsToMonths(years, &months)); + + int64_t months_adder; + PD_CHECK_OVERFLOW(checked_int64_sub(dts->month, 1, &months_adder)); + PD_CHECK_OVERFLOW(checked_int64_add(months, months_adder, &months)); + + if (base == NPY_FR_M) { + return months; + } + } + + const int64_t days = get_datetimestruct_days(dts); + if (base == NPY_FR_D) { + return days; + } + + if (base == NPY_FR_W) { + int64_t weeks; + PD_CHECK_OVERFLOW(scaleDaysToWeeks(days, &weeks)); + return weeks; + } + + int64_t hours; + PD_CHECK_OVERFLOW(scaleDaysToHours(days, &hours)); + PD_CHECK_OVERFLOW(checked_int64_add(hours, dts->hour, &hours)); + + if (base == NPY_FR_h) { + return hours; + } + + int64_t minutes; + PD_CHECK_OVERFLOW(scaleHoursToMinutes(hours, &minutes)); + PD_CHECK_OVERFLOW(checked_int64_add(minutes, dts->min, &minutes)); + + if (base == NPY_FR_m) { + return minutes; + } + + int64_t seconds; + PD_CHECK_OVERFLOW(scaleMinutesToSeconds(minutes, &seconds)); + PD_CHECK_OVERFLOW(checked_int64_add(seconds, dts->sec, &seconds)); + + if (base == NPY_FR_s) { + return seconds; + } + + if (base == NPY_FR_ms) { + int64_t milliseconds; + PD_CHECK_OVERFLOW(scaleSecondsToMilliseconds(seconds, &milliseconds)); + PD_CHECK_OVERFLOW( + checked_int64_add(milliseconds, dts->us / 1000, &milliseconds)); + + return milliseconds; + } + + int64_t microseconds; + PD_CHECK_OVERFLOW(scaleSecondsToMicroseconds(seconds, µseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(microseconds, dts->us, µseconds)); + + if (base == NPY_FR_us) { + return microseconds; + } + + if (base == NPY_FR_ns) { + int64_t nanoseconds; + PD_CHECK_OVERFLOW( + scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds)); + PD_CHECK_OVERFLOW( + checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds)); + + return nanoseconds; + } + + int64_t picoseconds; + PD_CHECK_OVERFLOW(scaleMicrosecondsToPicoseconds(microseconds, &picoseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(picoseconds, dts->ps, &picoseconds)); + + if (base == NPY_FR_ps) { + return picoseconds; + } + + if (base == NPY_FR_fs) { + int64_t femtoseconds; + PD_CHECK_OVERFLOW( + scalePicosecondsToFemtoseconds(picoseconds, &femtoseconds)); + PD_CHECK_OVERFLOW( + checked_int64_add(femtoseconds, dts->as / 1000, &femtoseconds)); + return femtoseconds; + } + + if (base == NPY_FR_as) { + int64_t attoseconds; + PD_CHECK_OVERFLOW(scalePicosecondsToAttoseconds(picoseconds, &attoseconds)); + PD_CHECK_OVERFLOW(checked_int64_add(attoseconds, dts->as, &attoseconds)); + return attoseconds; + } + + /* Something got corrupted */ + PyGILState_STATE gstate = PyGILState_Ensure(); + PyErr_SetString(PyExc_ValueError, + "NumPy datetime metadata with corrupt unit value"); + PyGILState_Release(gstate); + + return -1; } /* diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 72faf1757a7b0..f073d099d37ee 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -34,7 +34,6 @@ cnp.import_array() from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, - check_dts_bounds, import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, @@ -99,8 +98,10 @@ def _test_parse_iso8601(ts: str): obj = _TSObject() string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True) - obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts) - check_dts_bounds(&obj.dts) + try: + obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts) + except OverflowError as err: + raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {ts}") from err if out_local == 1: obj.tzinfo = timezone(timedelta(minutes=out_tzoffset)) obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) @@ -488,7 +489,6 @@ cpdef array_to_datetime( elif PyDate_Check(val): iresult[i] = pydate_to_dt64(val, &dts, reso=creso) - check_dts_bounds(&dts, creso) state.found_other = True elif is_datetime64_object(val): diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a2bc6d4d52f2e..4f14782d9efbb 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -212,8 +212,12 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1: if unit != reso: pandas_datetime_to_datetimestruct(ival, unit, &dts) - check_dts_bounds(&dts, reso) - ival = npy_datetimestruct_to_datetime(reso, &dts) + try: + ival = npy_datetimestruct_to_datetime(reso, &dts) + except OverflowError as err: + raise OutOfBoundsDatetime( + "Out of bounds nanosecond timestamp: {val}" + ) from err return ival @@ -413,14 +417,16 @@ cdef _TSObject convert_datetime_to_tsobject( if nanos: obj.dts.ps = nanos * 1000 - obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) + try: + obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) + except OverflowError as err: + raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err if obj.tzinfo is not None and not is_utc(obj.tzinfo): offset = get_utcoffset(obj.tzinfo, ts) pps = periods_per_second(reso) obj.value -= int(offset.total_seconds() * pps) - check_dts_bounds(&obj.dts, reso) check_overflows(obj, reso) return obj @@ -713,5 +719,4 @@ cdef int64_t parse_pydatetime( result = (<_Timestamp>val)._as_creso(creso, round_ok=False)._value else: result = pydatetime_to_dt64(val, dts, reso=creso) - check_dts_bounds(dts, creso) return result diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 445b832e8a5bd..9cc211b748f68 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -60,7 +60,7 @@ cdef extern from "pandas/datetime/pd_datetime.h": npy_datetimestruct *result) nogil npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, - npy_datetimestruct *d) nogil + npy_datetimestruct *d) except? -1 nogil void pandas_timedelta_to_timedeltastruct(npy_timedelta val, NPY_DATETIMEUNIT fr, @@ -80,15 +80,17 @@ cdef inline void import_pandas_datetime() noexcept: cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 +cdef str dts_to_iso_string(npy_datetimestruct *dts) + cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?) cdef int64_t pydatetime_to_dt64( datetime val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=? -) +) except? -1 cdef void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts) noexcept cdef int64_t pydate_to_dt64( date val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=? -) +) except? -1 cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) noexcept cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) noexcept nogil diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 3ffd70f83e88f..71a194177bf82 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -194,6 +194,11 @@ cdef get_implementation_bounds( raise NotImplementedError(reso) +cdef str dts_to_iso_string(npy_datetimestruct *dts): + return (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " + f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") + + cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=NPY_FR_ns): """Raises OutOfBoundsDatetime if the given date is outside the range that can be represented by nanosecond-resolution 64-bit integers.""" @@ -209,8 +214,7 @@ cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=NPY_FR_ns): error = True if error: - fmt = (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " - f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") + fmt = dts_to_iso_string(dts) # TODO: "nanosecond" in the message assumes NPY_FR_ns raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {fmt}") @@ -246,12 +250,20 @@ cdef void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts) noexcept: cdef int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts, - NPY_DATETIMEUNIT reso=NPY_FR_ns): + NPY_DATETIMEUNIT reso=NPY_FR_ns) except? -1: """ Note we are assuming that the datetime object is timezone-naive. """ + cdef int64_t result pydatetime_to_dtstruct(val, dts) - return npy_datetimestruct_to_datetime(reso, dts) + try: + result = npy_datetimestruct_to_datetime(reso, dts) + except OverflowError as err: + raise OutOfBoundsDatetime( + f"Out of bounds nanosecond timestamp: {val}" + ) from err + + return result cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) noexcept: @@ -265,9 +277,16 @@ cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) noexcept: cdef int64_t pydate_to_dt64( date val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=NPY_FR_ns -): +) except? -1: + cdef int64_t result pydate_to_dtstruct(val, dts) - return npy_datetimestruct_to_datetime(reso, dts) + + try: + result = npy_datetimestruct_to_datetime(reso, dts) + except OverflowError as err: + raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {val}") from err + + return result cdef int string_to_dts( @@ -641,7 +660,12 @@ cdef int64_t _convert_reso_with_dtstruct( ) except? -1: cdef: npy_datetimestruct dts + int64_t result pandas_datetime_to_datetimestruct(value, from_unit, &dts) - check_dts_bounds(&dts, to_unit) - return npy_datetimestruct_to_datetime(to_unit, &dts) + try: + result = npy_datetimestruct_to_datetime(to_unit, &dts) + except OverflowError as err: + raise OutOfBoundsDatetime from err + + return result diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index d1f925f3a0b48..318e018689a78 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -43,6 +43,8 @@ from pandas._libs.tslibs.dtypes cimport ( freq_to_period_freqstr, ) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + # import datetime C API import_datetime() @@ -52,7 +54,7 @@ from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_D, astype_overflowsafe, - check_dts_bounds, + dts_to_iso_string, import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, @@ -1156,14 +1158,20 @@ cpdef int64_t period_ordinal(int y, int m, int d, int h, int min, cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: cdef: npy_datetimestruct dts + int64_t result if ordinal == NPY_NAT: return NPY_NAT get_date_info(ordinal, freq, &dts) - check_dts_bounds(&dts) - return npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts) + try: + result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts) + except OverflowError as err: + fmt = dts_to_iso_string(&dts) + raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {fmt}") from err + + return result cdef str period_format(int64_t value, int freq, object fmt=None): diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index c8fd95be34cc0..8966c9e81699b 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -60,7 +60,6 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, - check_dts_bounds, get_datetime64_unit, import_pandas_datetime, npy_datetimestruct, @@ -369,7 +368,6 @@ def array_strptime( iresult[i] = pydatetime_to_dt64( val.replace(tzinfo=None), &dts, reso=creso ) - check_dts_bounds(&dts, creso) result_timezone[i] = val.tzinfo continue elif PyDate_Check(val): @@ -378,7 +376,6 @@ def array_strptime( if infer_reso: creso = state.creso iresult[i] = pydate_to_dt64(val, &dts, reso=creso) - check_dts_bounds(&dts, creso) continue elif is_datetime64_object(val): item_reso = get_supported_reso(get_datetime64_unit(val)) @@ -413,7 +410,12 @@ def array_strptime( state.update_creso(item_reso) if infer_reso: creso = state.creso - value = npy_datetimestruct_to_datetime(creso, &dts) + try: + value = npy_datetimestruct_to_datetime(creso, &dts) + except OverflowError as err: + raise OutOfBoundsDatetime( + f"Out of bounds nanosecond timestamp: {val}" + ) from err if out_local == 1: # Store the out_tzoffset in seconds # since we store the total_seconds of @@ -423,7 +425,6 @@ def array_strptime( out_local = 0 out_tzoffset = 0 iresult[i] = value - check_dts_bounds(&dts, creso) continue if parse_today_now(val, &iresult[i], utc, creso, infer_reso=infer_reso): @@ -444,11 +445,16 @@ def array_strptime( tz = _parse_with_format( val, fmt, exact, format_regex, locale_time, &dts, &item_reso ) + state.update_creso(item_reso) if infer_reso: creso = state.creso - iresult[i] = npy_datetimestruct_to_datetime(creso, &dts) - check_dts_bounds(&dts, creso) + try: + iresult[i] = npy_datetimestruct_to_datetime(creso, &dts) + except OverflowError as err: + raise OutOfBoundsDatetime( + f"Out of bounds nanosecond timestamp: {val}" + ) from err result_timezone[i] = tz except (ValueError, OutOfBoundsDatetime) as ex: diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 26018cd904249..bd73c713f6c04 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -26,7 +26,7 @@ cdef class _Timestamp(ABCTimestamp): cdef bint _get_start_end_field(self, str field, freq) cdef _get_date_name_field(self, str field, object locale) - cdef int64_t _maybe_convert_value_to_local(self) + cdef int64_t _maybe_convert_value_to_local(self) except? -1 cdef bint _can_compare(self, datetime other) cpdef to_datetime64(self) cpdef datetime to_pydatetime(_Timestamp self, bint warn=*) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d0c3c5c23b272..56a6885d4a9e0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -83,10 +83,10 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, - check_dts_bounds, cmp_dtstructs, cmp_scalar, convert_reso, + dts_to_iso_string, get_datetime64_unit, get_unit_from_dtype, import_pandas_datetime, @@ -571,7 +571,7 @@ cdef class _Timestamp(ABCTimestamp): # ----------------------------------------------------------------- - cdef int64_t _maybe_convert_value_to_local(self): + cdef int64_t _maybe_convert_value_to_local(self) except? -1: """Convert UTC i8 value to local i8 value if tz exists""" cdef: int64_t val @@ -2489,8 +2489,13 @@ default 'raise' # We can avoid going through pydatetime paths, which is robust # to datetimes outside of pydatetime range. ts = _TSObject() - check_dts_bounds(&dts, self._creso) - ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) + try: + ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) + except OverflowError as err: + fmt = dts_to_iso_string(&dts) + raise OutOfBoundsDatetime( + f"Out of bounds timestamp: {fmt} with frequency '{self.unit}'" + ) from err ts.dts = dts ts.creso = self._creso ts.fold = fold diff --git a/pandas/tests/scalar/timestamp/methods/test_replace.py b/pandas/tests/scalar/timestamp/methods/test_replace.py index 9b2b21cf7f388..8a208455edc82 100644 --- a/pandas/tests/scalar/timestamp/methods/test_replace.py +++ b/pandas/tests/scalar/timestamp/methods/test_replace.py @@ -21,7 +21,7 @@ def test_replace_out_of_pydatetime_bounds(self): # GH#50348 ts = Timestamp("2016-01-01").as_unit("ns") - msg = "Out of bounds nanosecond timestamp: 99999-01-01 00:00:00" + msg = "Out of bounds timestamp: 99999-01-01 00:00:00 with frequency 'ns'" with pytest.raises(OutOfBoundsDatetime, match=msg): ts.replace(year=99_999) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 86560969d10ce..3ab214196a218 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -15,6 +15,7 @@ tslib, ) from pandas._libs.tslibs.dtypes import NpyDatetimeUnit +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas import Timestamp import pandas._testing as tm @@ -162,7 +163,7 @@ def test_coerce_outside_ns_bounds(invalid_date, errors): if errors == "raise": msg = "^Out of bounds nanosecond timestamp: .*, at position 0$" - with pytest.raises(ValueError, match=msg): + with pytest.raises(OutOfBoundsDatetime, match=msg): tslib.array_to_datetime(**kwargs) else: # coerce. result, _ = tslib.array_to_datetime(**kwargs)