From 0e0201a4924575c9fc1c5008fb213052150455a5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Jan 2018 21:09:25 -0800 Subject: [PATCH] direct conversions for dt64 units --- pandas/_libs/src/datetime/np_datetime.c | 3 +- pandas/_libs/src/datetime/np_datetime.h | 2 + pandas/_libs/tslibs/conversion.pyx | 14 +-- pandas/_libs/tslibs/np_datetime.pxd | 5 +- pandas/_libs/tslibs/np_datetime.pyx | 132 ++++++++++++++++++++++++ 5 files changed, 140 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/src/datetime/np_datetime.c b/pandas/_libs/src/datetime/np_datetime.c index 89753ccf7d773..8d0a8274eae6a 100644 --- a/pandas/_libs/src/datetime/np_datetime.c +++ b/pandas/_libs/src/datetime/np_datetime.c @@ -234,8 +234,7 @@ NPY_NO_EXPORT void add_seconds_to_datetimestruct(pandas_datetimestruct *dts, * Fills in the year, month, day in 'dts' based on the days * offset from 1970. */ -static void set_datetimestruct_days(npy_int64 days, - pandas_datetimestruct *dts) { +void set_datetimestruct_days(npy_int64 days, pandas_datetimestruct *dts) { const int *month_lengths; int i; diff --git a/pandas/_libs/src/datetime/np_datetime.h b/pandas/_libs/src/datetime/np_datetime.h index b6c0852bfe764..ed4f8d57460a1 100644 --- a/pandas/_libs/src/datetime/np_datetime.h +++ b/pandas/_libs/src/datetime/np_datetime.h @@ -71,6 +71,8 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta val, PANDAS_DATETIMEUNIT fr, pandas_timedeltastruct *result); +void set_datetimestruct_days(npy_int64 days, pandas_datetimestruct *dts); + int dayofweek(int y, int m, int d); extern const int days_per_month_table[2][12]; diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a32bfc1f6836c..f9edbdf9154b4 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -20,12 +20,12 @@ PyDateTime_IMPORT from np_datetime cimport (check_dts_bounds, pandas_datetimestruct, - pandas_datetime_to_datetimestruct, _string_to_dts, + _string_to_dts, PANDAS_DATETIMEUNIT, PANDAS_FR_ns, npy_datetime, dt64_to_dtstruct, dtstruct_to_dt64, get_datetime64_unit, get_datetime64_value, - pydatetime_to_dt64) + pydatetime_to_dt64, convert_to_ns) from util cimport (is_string_object, is_datetime64_object, @@ -60,7 +60,6 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1: value to nanoseconds if necessary. """ cdef: - pandas_datetimestruct dts PANDAS_DATETIMEUNIT unit npy_datetime ival @@ -68,9 +67,7 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1: ival = get_datetime64_value(val) if unit != PANDAS_FR_ns: - pandas_datetime_to_datetimestruct(ival, unit, &dts) - check_dts_bounds(&dts) - ival = dtstruct_to_dt64(&dts) + ival = convert_to_ns(ival, unit) return ival @@ -93,7 +90,6 @@ def ensure_datetime64ns(ndarray arr, copy=True): Py_ssize_t i, n = arr.size ndarray[int64_t] ivalues, iresult PANDAS_DATETIMEUNIT unit - pandas_datetimestruct dts shape = ( arr).shape @@ -113,9 +109,7 @@ def ensure_datetime64ns(ndarray arr, copy=True): else: for i in range(n): if ivalues[i] != NPY_NAT: - pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts) - iresult[i] = dtstruct_to_dt64(&dts) - check_dts_bounds(&dts) + iresult[i] = convert_to_ns(ivalues[i], unit) else: iresult[i] = NPY_NAT diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 33b8b32bcf2dc..22cd6018b158e 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -50,10 +50,6 @@ cdef extern from "../src/datetime/np_datetime.h": PANDAS_FR_fs PANDAS_FR_as - void pandas_datetime_to_datetimestruct(npy_datetime val, - PANDAS_DATETIMEUNIT fr, - pandas_datetimestruct *result) nogil - int days_per_month_table[2][12] int dayofweek(int y, int m, int d) nogil int is_leapyear(int64_t year) nogil @@ -65,6 +61,7 @@ cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 cdef check_dts_bounds(pandas_datetimestruct *dts) +cdef int64_t convert_to_ns(int64_t val, PANDAS_DATETIMEUNIT unit) except? -1 cdef int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil cdef void td64_to_tdstruct(int64_t td64, pandas_timedeltastruct* out) nogil diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 7f861a50f03b8..d6ec2626f53b0 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- # cython: profile=False +cimport cython +from cython cimport Py_ssize_t from cpython cimport (Py_EQ, Py_NE, Py_GE, Py_GT, Py_LT, Py_LE, PyUnicode_Check, PyUnicode_AsASCIIString) @@ -13,6 +15,7 @@ from cpython.datetime cimport (datetime, date, PyDateTime_DATE_GET_MICROSECOND) PyDateTime_IMPORT +import numpy as np from numpy cimport int64_t cdef extern from "../src/datetime/np_datetime.h": @@ -33,6 +36,9 @@ cdef extern from "../src/datetime/np_datetime.h": ) nogil pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS + void set_datetimestruct_days(int64_t days, + pandas_datetimestruct *dts) nogil + cdef extern from "../src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(char *str, int len, @@ -199,3 +205,129 @@ cdef inline int _cstring_to_dts(char *val, int length, result = parse_iso_8601_datetime(val, length, dts, out_local, out_tzoffset) return result + + +# ---------------------------------------------------------------------- +# Unit Conversion +cdef datetime EPOCH = datetime(1970, 1, 1) + +cdef int64_t* _coeffs = [0, # PANDAS_FR_Y + 0, # PANDAS_FR_M + 7 * 24 * 3600 * 1000 * 1000 * 1000, # PANDAS_FR_W + 0, # NPY_FR_B dummy + 24 * 3600 * 1000 * 1000 * 1000, # PANDAS_FR_D + 3600 * 1000 * 1000 * 1000L, # PANDAS_FR_h + 60 * 1000 * 1000 * 1000L, # PANDAS_FR_m + 1000 * 1000 * 1000, # PANDAS_FR_s + 1000 * 1000L, # PANDAS_FR_ms + 1000, # PANDAS_FR_us + 1, # PANDAS_FR_ns + # From here down we divide instead of multiply + 1000, # PANDAS_FR_ps + 1000 * 1000, # PANDAS_FR_fs + 1000 * 1000 * 1000] # PANDAS_FR_as + +# The largest absolute values these can take _without_ raising. +cdef int64_t* _bounds = [292, # PANDAS_FR_Y dummy + 3507, # PANDAS_FR_M dummy + 15250, # PANDAS_FR_W + 0, # NPY_FR_B dummy + 106751, # PANDAS_FR_D + 2562047, # PANDAS_FR_h + 153722867, # PANDAS_FR_m + 9223372036, # PANDAS_FR_s + 9223372036854, # PANDAS_FR_ms + 9223372036854775, # PANDAS_FR_us + 9223372036854775807, # PANDAS_FR_ns + 9223372036854775807, # PANDAS_FR_ps + 9223372036854775807, # PANDAS_FR_fs + 9223372036854775807] # PANDAS_FR_as + +# Type names for the np.datetime64 types that are liable to overflow; +# used so we can render the correct exception message +cdef dict type_names = {PANDAS_FR_Y: 'Y', PANDAS_FR_M: 'M', PANDAS_FR_W: 'W', + PANDAS_FR_D: 'D', PANDAS_FR_h: 'h', PANDAS_FR_m: 'm', + PANDAS_FR_s: 's', PANDAS_FR_ms: 'ms', + PANDAS_FR_us: 'us'} + + +cdef int64_t convert_to_ns(int64_t val, PANDAS_DATETIMEUNIT unit) except? -1: + """Convert the int64_t representation of a timestamp with the given unit + to a representation using PANDAS_FR_ns. + """ + cdef: + datetime dt + int64_t year, month + int64_t coeff, bound + + bound = _bounds[unit] + if abs(val) > bound: + unit_name = type_names[unit] + val_ns = np.datetime64(val, unit_name).astype('datetime64[ns]') + fmt = str(val_ns).replace('T', ' ') + raise OutOfBoundsDatetime('Out of bounds nanosecond timestamp: ' + '{fmt}'.format(fmt=fmt)) + + if unit == PANDAS_FR_Y: + dt = datetime(1970 + val, 1, 1) + return int((dt - EPOCH).total_seconds() * 1e9) + + elif unit == PANDAS_FR_M: + if val >= 0: + year = 1970 + val // 12 + month = val % 12 + 1 + else: + year = 1969 + (val + 1) // 12 + month = 12 + (val + 1) % 12 + + dt = datetime(year, month, 1) + return int((dt - EPOCH).total_seconds() * 1e9) + + elif unit < PANDAS_FR_ns: + coeff = _coeffs[unit] + return val * coeff + + elif unit > PANDAS_FR_ns: + # no risk of overflows + coeff = _coeffs[unit] + return val // coeff + + +@cython.cdivision +cdef int convert_datetime_to_dtstruct(int64_t dt, pandas_datetimestruct *out): + """ + convert a nanosecond (PANDAS_FR_ns) int64_t timestamp to + a pandas_datetimestruct + + Parameters + ---------- + dt : int64_t + out : pandas_datetimestruct* + + Returns + ------- + code : 0 on success + """ + cdef: + int64_t perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL + + # Note that care must be taken with the / and % operators + # for negative values. + + if dt >= 0: + set_datetimestruct_days(dt / perday, out) + dt = dt % perday; + else: + if dt % perday == 0: + set_datetimestruct_days(dt / perday - 0, out) + else: + set_datetimestruct_days(dt / perday - 1, out) + dt = (perday - 1) + (dt + 1) % perday + + out.hour = dt / (60 * 60 * 1000000000LL) + out.min = (dt / (60 * 1000000000LL)) % 60 + out.sec = (dt / 1000000000LL) % 60 + out.us = (dt / 1000LL) % 1000000LL + out.ps = (dt % 1000LL) * 1000 + + return 0