diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5269cddf8d2fd..7a335b19eb87b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -47,7 +47,6 @@ from datetime cimport ( npy_datetime, is_leapyear, dayofweek, - check_dts_bounds, PANDAS_FR_ns, PyDateTime_Check, PyDate_Check, PyDateTime_IMPORT, @@ -58,6 +57,9 @@ from datetime cimport ( from datetime import timedelta, datetime from datetime import time as datetime_time +from tslibs.np_datetime cimport check_dts_bounds +from tslibs.np_datetime import OutOfBoundsDatetime + from khash cimport ( khiter_t, kh_destroy_int64, kh_put_int64, @@ -732,7 +734,7 @@ class Timestamp(_Timestamp): ts = convert_datetime_to_tsobject(ts_input, _tzinfo) value = ts.value + (dts.ps // 1000) if value != NPY_NAT: - _check_dts_bounds(&dts) + check_dts_bounds(&dts) return create_timestamp_from_ts(value, dts, _tzinfo, self.freq) @@ -1645,7 +1647,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, 'Timestamp'.format(ts, type(ts))) if obj.value != NPY_NAT: - _check_dts_bounds(&obj.dts) + check_dts_bounds(&obj.dts) if tz is not None: _localize_tso(obj, tz) @@ -1726,7 +1728,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.value += nanos obj.dts.ps = nanos * 1000 - _check_dts_bounds(&obj.dts) + check_dts_bounds(&obj.dts) return obj @@ -1762,12 +1764,12 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) obj.value = pandas_datetimestruct_to_datetime( PANDAS_FR_ns, &obj.dts) - _check_dts_bounds(&obj.dts) + check_dts_bounds(&obj.dts) if out_local == 1: obj.tzinfo = pytz.FixedOffset(out_tzoffset) obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') if tz is None: - _check_dts_bounds(&obj.dts) + check_dts_bounds(&obj.dts) return obj else: # Keep the converter same as PyDateTime's @@ -1810,7 +1812,7 @@ def _test_parse_iso8601(object ts): _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts) - _check_dts_bounds(&obj.dts) + check_dts_bounds(&obj.dts) if out_local == 1: obj.tzinfo = pytz.FixedOffset(out_tzoffset) obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') @@ -1897,18 +1899,6 @@ cpdef inline object _localize_pydatetime(object dt, object tz): return dt.replace(tzinfo=tz) -class OutOfBoundsDatetime(ValueError): - pass - -cdef inline _check_dts_bounds(pandas_datetimestruct *dts): - if check_dts_bounds(dts): - fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month, - dts.day, dts.hour, - dts.min, dts.sec) - raise OutOfBoundsDatetime( - 'Out of bounds nanosecond timestamp: %s' % fmt) - - def datetime_to_datetime64(ndarray[object] values): cdef: Py_ssize_t i, n = len(values) @@ -1933,13 +1923,13 @@ def datetime_to_datetime64(ndarray[object] values): _ts = convert_datetime_to_tsobject(val, None) iresult[i] = _ts.value - _check_dts_bounds(&_ts.dts) + check_dts_bounds(&_ts.dts) else: if inferred_tz is not None: raise ValueError('Cannot mix tz-aware with ' 'tz-naive values') iresult[i] = _pydatetime_to_dts(val, &dts) - _check_dts_bounds(&dts) + check_dts_bounds(&dts) else: raise TypeError('Unrecognized value type: %s' % type(val)) @@ -2252,7 +2242,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', _ts = convert_datetime_to_tsobject(val, None) iresult[i] = _ts.value try: - _check_dts_bounds(&_ts.dts) + check_dts_bounds(&_ts.dts) except ValueError: if is_coerce: iresult[i] = NPY_NAT @@ -2267,7 +2257,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if is_timestamp(val): iresult[i] += val.nanosecond try: - _check_dts_bounds(&dts) + check_dts_bounds(&dts) except ValueError: if is_coerce: iresult[i] = NPY_NAT @@ -2277,7 +2267,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', elif PyDate_Check(val): iresult[i] = _date_to_datetime64(val, &dts) try: - _check_dts_bounds(&dts) + check_dts_bounds(&dts) seen_datetime = 1 except ValueError: if is_coerce: @@ -2334,7 +2324,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', tz = pytz.FixedOffset(out_tzoffset) value = tz_convert_single(value, tz, 'UTC') iresult[i] = value - _check_dts_bounds(&dts) + check_dts_bounds(&dts) except ValueError: # if requiring iso8601 strings, skip trying other formats if require_iso8601: @@ -2433,7 +2423,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) _pydatetime_to_dts(oresult[i], &dts) - _check_dts_bounds(&dts) + check_dts_bounds(&dts) except Exception: if is_raise: raise @@ -3239,7 +3229,7 @@ cdef inline _get_datetime64_nanos(object val): if unit != PANDAS_FR_ns: pandas_datetime_to_datetimestruct(ival, unit, &dts) - _check_dts_bounds(&dts) + check_dts_bounds(&dts) return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) else: return ival @@ -3267,7 +3257,7 @@ def cast_to_nanoseconds(ndarray arr): if ivalues[i] != NPY_NAT: pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts) iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) - _check_dts_bounds(&dts) + check_dts_bounds(&dts) else: iresult[i] = NPY_NAT diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd new file mode 100644 index 0000000000000..d4079aae68900 --- /dev/null +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +from numpy cimport int64_t, int32_t + + +cdef extern from "../src/datetime/np_datetime.h": + ctypedef struct pandas_datetimestruct: + int64_t year + int32_t month, day, hour, min, sec, us, ps, as + + +cdef check_dts_bounds(pandas_datetimestruct *dts) + +cdef int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil +cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx new file mode 100644 index 0000000000000..38158c816812d --- /dev/null +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +from numpy cimport int64_t + +cdef extern from "numpy/ndarrayobject.h": + ctypedef int64_t npy_timedelta + ctypedef int64_t npy_datetime + +cdef extern from "../src/datetime/np_datetime.h": + ctypedef enum PANDAS_DATETIMEUNIT: + PANDAS_FR_Y + PANDAS_FR_M + PANDAS_FR_W + PANDAS_FR_D + PANDAS_FR_B + PANDAS_FR_h + PANDAS_FR_m + PANDAS_FR_s + PANDAS_FR_ms + PANDAS_FR_us + PANDAS_FR_ns + PANDAS_FR_ps + PANDAS_FR_fs + PANDAS_FR_as + + int cmp_pandas_datetimestruct(pandas_datetimestruct *a, + pandas_datetimestruct *b) + + npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, + pandas_datetimestruct *d + ) nogil + + void pandas_datetime_to_datetimestruct(npy_datetime val, + PANDAS_DATETIMEUNIT fr, + pandas_datetimestruct *result) nogil + + pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS + +# ---------------------------------------------------------------------- + + +class OutOfBoundsDatetime(ValueError): + pass + + +cdef inline check_dts_bounds(pandas_datetimestruct *dts): + """Raises OutOfBoundsDatetime if the given date is outside the range that + can be represented by nanosecond-resolution 64-bit integers.""" + cdef: + bint error = False + + if (dts.year <= 1677 and + cmp_pandas_datetimestruct(dts, &_NS_MIN_DTS) == -1): + error = True + elif (dts.year >= 2262 and + cmp_pandas_datetimestruct(dts, &_NS_MAX_DTS) == 1): + error = True + + if error: + fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month, + dts.day, dts.hour, + dts.min, dts.sec) + raise OutOfBoundsDatetime( + 'Out of bounds nanosecond timestamp: {fmt}'.format(fmt=fmt)) + + +# ---------------------------------------------------------------------- +# Conversion + +cdef inline int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil: + """Convenience function to call pandas_datetimestruct_to_datetime + with the by-far-most-common frequency PANDAS_FR_ns""" + return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts) + + +cdef inline void dt64_to_dtstruct(int64_t dt64, + pandas_datetimestruct* out) nogil: + """Convenience function to call pandas_datetime_to_datetimestruct + with the by-far-most-common frequency PANDAS_FR_ns""" + pandas_datetime_to_datetimestruct(dt64, PANDAS_FR_ns, out) + return diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 59a7376280da0..4a141c7b56428 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -33,12 +33,8 @@ from numpy cimport ndarray, int64_t from datetime import date as datetime_date from datetime cimport datetime -# This is src/datetime.pxd -from datetime cimport ( - PANDAS_FR_ns, - check_dts_bounds, - pandas_datetimestruct, - pandas_datetimestruct_to_datetime) +from np_datetime cimport (check_dts_bounds, + dtstruct_to_dt64, pandas_datetimestruct) from util cimport is_string_object, get_nat @@ -333,18 +329,14 @@ def array_strptime(ndarray[object] values, object fmt, dts.us = us dts.ps = ns * 1000 - iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) - if check_dts_bounds(&dts): + iresult[i] = dtstruct_to_dt64(&dts) + try: + check_dts_bounds(&dts) + except ValueError: if is_coerce: iresult[i] = NPY_NAT continue - else: - from pandas._libs.tslib import OutOfBoundsDatetime - fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month, - dts.day, dts.hour, - dts.min, dts.sec) - raise OutOfBoundsDatetime( - 'Out of bounds nanosecond timestamp: %s' % fmt) + raise return result diff --git a/setup.py b/setup.py index 8b3ae40f01a10..2843ab6587412 100755 --- a/setup.py +++ b/setup.py @@ -344,6 +344,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/sparse.pyx', 'pandas/_libs/parsers.pyx', 'pandas/_libs/tslibs/strptime.pyx', + 'pandas/_libs/tslibs/np_datetime.pyx', 'pandas/_libs/tslibs/timedeltas.pyx', 'pandas/_libs/tslibs/timezones.pyx', 'pandas/_libs/tslibs/fields.pyx', @@ -469,12 +470,11 @@ def pxd(name): 'pandas/_libs/src/parse_helper.h', 'pandas/_libs/src/compat_helper.h'] - -tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h', - 'pandas/_libs/src/datetime/np_datetime_strings.h', - 'pandas/_libs/src/datetime.pxd'] -npdt_srces = ['pandas/_libs/src/datetime/np_datetime.c', - 'pandas/_libs/src/datetime/np_datetime_strings.c'] +np_datetime_headers = ['pandas/_libs/src/datetime/np_datetime.h', + 'pandas/_libs/src/datetime/np_datetime_strings.h'] +np_datetime_sources = ['pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c'] +tseries_depends = np_datetime_headers + ['pandas/_libs/src/datetime.pxd'] # some linux distros require it libraries = ['m'] if not is_platform_windows() else [] @@ -489,28 +489,31 @@ def pxd(name): _pxi_dep['hashtable'])}, '_libs.tslibs.strptime': {'pyxfile': '_libs/tslibs/strptime', 'depends': tseries_depends, - 'sources': npdt_srces}, + 'sources': np_datetime_sources}, '_libs.tslibs.offsets': {'pyxfile': '_libs/tslibs/offsets'}, '_libs.tslib': {'pyxfile': '_libs/tslib', 'pxdfiles': ['_libs/src/util', '_libs/lib'], 'depends': tseries_depends, - 'sources': npdt_srces}, + 'sources': np_datetime_sources}, + '_libs.tslibs.np_datetime': {'pyxfile': '_libs/tslibs/np_datetime', + 'depends': np_datetime_headers, + 'sources': np_datetime_sources}, '_libs.tslibs.timedeltas': {'pyxfile': '_libs/tslibs/timedeltas'}, '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'}, '_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields', 'depends': tseries_depends, - 'sources': npdt_srces}, + 'sources': np_datetime_sources}, '_libs.period': {'pyxfile': '_libs/period', 'depends': (tseries_depends + ['pandas/_libs/src/period_helper.h']), - 'sources': npdt_srces + [ + 'sources': np_datetime_sources + [ 'pandas/_libs/src/period_helper.c']}, '_libs.tslibs.parsing': {'pyxfile': '_libs/tslibs/parsing', 'pxdfiles': ['_libs/src/util']}, '_libs.tslibs.frequencies': {'pyxfile': '_libs/tslibs/frequencies', 'pxdfiles': ['_libs/src/util']}, '_libs.index': {'pyxfile': '_libs/index', - 'sources': npdt_srces, + 'sources': np_datetime_sources, 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], 'depends': _pxi_dep['index']}, '_libs.algos': {'pyxfile': '_libs/algos', @@ -623,7 +626,7 @@ def pxd(name): 'pandas/_libs/src/ujson/python/JSONtoObj.c', 'pandas/_libs/src/ujson/lib/ultrajsonenc.c', 'pandas/_libs/src/ujson/lib/ultrajsondec.c'] + - npdt_srces), + np_datetime_sources), include_dirs=(['pandas/_libs/src/ujson/python', 'pandas/_libs/src/ujson/lib', 'pandas/_libs/src/datetime'] +