From bee93f8e810ed803c654c8dc640679ddd51fa489 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 5 Dec 2017 18:22:49 -0800 Subject: [PATCH 1/3] handle now and today in cython instead of C --- pandas/_libs/src/datetime/np_datetime.c | 2 +- .../_libs/src/datetime/np_datetime_strings.c | 102 ------------------ pandas/_libs/tslib.pyx | 34 ++++-- 3 files changed, 29 insertions(+), 109 deletions(-) diff --git a/pandas/_libs/src/datetime/np_datetime.c b/pandas/_libs/src/datetime/np_datetime.c index edc9c0f8f903d..fd76f3328c05b 100644 --- a/pandas/_libs/src/datetime/np_datetime.c +++ b/pandas/_libs/src/datetime/np_datetime.c @@ -527,7 +527,7 @@ void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, void pandas_timedelta_to_timedeltastruct(npy_timedelta val, PANDAS_DATETIMEUNIT fr, pandas_timedeltastruct *result) { - convert_timedelta_to_timedeltastruct(fr, val, result); + convert_timedelta_to_timedeltastruct(fr, val, result); } diff --git a/pandas/_libs/src/datetime/np_datetime_strings.c b/pandas/_libs/src/datetime/np_datetime_strings.c index a047650f4c88d..2ea69e2ac1636 100644 --- a/pandas/_libs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/src/datetime/np_datetime_strings.c @@ -33,55 +33,6 @@ This file implements string parsing and creation for NumPy datetime. #include "np_datetime_strings.h" -/* Platform-specific time_t typedef */ -typedef time_t NPY_TIME_T; - -/* - * Wraps `localtime` functionality for multiple platforms. This - * converts a time value to a time structure in the local timezone. - * - * Returns 0 on success, -1 on failure. - */ -static int get_localtime(NPY_TIME_T *ts, struct tm *tms) { - char *func_name = ""; -#if defined(_WIN32) -#if defined(_MSC_VER) && (_MSC_VER >= 1400) - if (localtime_s(tms, ts) != 0) { - func_name = "localtime_s"; - goto fail; - } -#elif defined(__GNUC__) && defined(NPY_MINGW_USE_CUSTOM_MSVCR) - if (_localtime64_s(tms, ts) != 0) { - func_name = "_localtime64_s"; - goto fail; - } -#else - struct tm *tms_tmp; - localtime_r(ts, tms_tmp); - if (tms_tmp == NULL) { - func_name = "localtime"; - goto fail; - } - memcpy(tms, tms_tmp, sizeof(struct tm)); -#endif -#else - if (localtime_r(ts, tms) == NULL) { - func_name = "localtime_r"; - goto fail; - } -#endif - - return 0; - -fail: - PyErr_Format(PyExc_OSError, - "Failed to use '%s' to convert " - "to a local time", - func_name); - return -1; -} - - /* * Parses (almost) standard ISO 8601 date strings. The differences are: * @@ -138,59 +89,6 @@ int parse_iso_8601_datetime(char *str, int len, out->month = 1; out->day = 1; - /* - * The string "today" means take today's date in local time, and - * convert it to a date representation. This date representation, if - * forced into a time unit, will be at midnight UTC. - * This is perhaps a little weird, but done so that the - * 'datetime64[D]' type produces the date you expect, rather than - * switching to an adjacent day depending on the current time and your - * timezone. - */ - if (len == 5 && tolower(str[0]) == 't' && tolower(str[1]) == 'o' && - tolower(str[2]) == 'd' && tolower(str[3]) == 'a' && - tolower(str[4]) == 'y') { - NPY_TIME_T rawtime = 0; - struct tm tm_; - - time(&rawtime); - if (get_localtime(&rawtime, &tm_) < 0) { - return -1; - } - out->year = tm_.tm_year + 1900; - out->month = tm_.tm_mon + 1; - out->day = tm_.tm_mday; - - /* - * Indicate that this was a special value, and - * is a date (unit 'D'). - */ - if (out_local != NULL) { - *out_local = 0; - } - - return 0; - } - - /* The string "now" resolves to the current UTC time */ - if (len == 3 && tolower(str[0]) == 'n' && tolower(str[1]) == 'o' && - tolower(str[2]) == 'w') { - NPY_TIME_T rawtime = 0; - - time(&rawtime); - - /* - * Indicate that this was a special value, and - * use 's' because the time() function has resolution - * seconds. - */ - if (out_local != NULL) { - *out_local = 0; - } - - return convert_datetime_to_datetimestruct(PANDAS_FR_s, rawtime, out); - } - substr = str; sublen = len; diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 020ac812e1c20..3a192f47ea87f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -10,7 +10,8 @@ import numpy as np np.import_array() -from cpython cimport PyTypeObject, PyFloat_Check +from cpython cimport (PyTypeObject, PyFloat_Check, + PyUnicode_Check, PyUnicode_AsASCIIString) cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object) @@ -223,6 +224,14 @@ def _test_parse_iso8601(object ts): obj = _TSObject() + if PyUnicode_Check(ts): + ts = PyUnicode_AsASCIIString(ts) + + if ts == b'now': + return Timestamp.utcnow() + elif ts == b'today': + return Timestamp.utcnow().normalize() + _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) obj.value = dtstruct_to_dt64(&obj.dts) check_dts_bounds(&obj.dts) @@ -603,12 +612,25 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', elif is_string_object(val): # string - try: - if len(val) == 0 or val in nat_strings: - iresult[i] = NPY_NAT - continue + if len(val) == 0 or val in nat_strings: + iresult[i] = NPY_NAT + continue + + seen_string = 1 - seen_string = 1 + if PyUnicode_Check(val): + val = PyUnicode_AsASCIIString(val) + + if val == b'now': + # Note: this is *not* the same as Timestamp('now') + iresult[i] = Timestamp.utcnow().value + continue + elif val == b'today': + # Note: this is *not* the same as Timestamp('today') + iresult[i] = Timestamp.utcnow().normalize().value + continue + + try: _string_to_dts(val, &dts, &out_local, &out_tzoffset) value = dtstruct_to_dt64(&dts) if out_local == 1: From 3c49f148703a9891b0d2ff0ae9221681970609b2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 6 Dec 2017 15:48:31 -0800 Subject: [PATCH 2/3] delay encoding to bytes --- pandas/_libs/tslib.pyx | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3a192f47ea87f..72405630cfc03 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -10,8 +10,7 @@ import numpy as np np.import_array() -from cpython cimport (PyTypeObject, PyFloat_Check, - PyUnicode_Check, PyUnicode_AsASCIIString) +from cpython cimport PyTypeObject, PyFloat_Check cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object) @@ -224,12 +223,9 @@ def _test_parse_iso8601(object ts): obj = _TSObject() - if PyUnicode_Check(ts): - ts = PyUnicode_AsASCIIString(ts) - - if ts == b'now': + if ts == 'now': return Timestamp.utcnow() - elif ts == b'today': + elif ts == 'today': return Timestamp.utcnow().normalize() _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) @@ -618,14 +614,11 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', seen_string = 1 - if PyUnicode_Check(val): - val = PyUnicode_AsASCIIString(val) - - if val == b'now': + if val == 'now': # Note: this is *not* the same as Timestamp('now') iresult[i] = Timestamp.utcnow().value continue - elif val == b'today': + elif val == 'today': # Note: this is *not* the same as Timestamp('today') iresult[i] = Timestamp.utcnow().normalize().value continue From 6273f51e1a828435811c0b7551c1929d22433f6f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 7 Dec 2017 18:45:46 -0800 Subject: [PATCH 3/3] delay checks of today and now --- pandas/_libs/tslib.pyx | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 2da8280ca7293..293e10d1934fa 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -592,15 +592,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', seen_string = 1 - if val == 'now': - # Note: this is *not* the same as Timestamp('now') - iresult[i] = Timestamp.utcnow().value - continue - elif val == 'today': - # Note: this is *not* the same as Timestamp('today') - iresult[i] = Timestamp.utcnow().normalize().value - continue - try: _string_to_dts(val, &dts, &out_local, &out_tzoffset) value = dtstruct_to_dt64(&dts) @@ -612,6 +603,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', except ValueError: # if requiring iso8601 strings, skip trying other formats if require_iso8601: + if _parse_today_now(val, &iresult[i]): + continue if is_coerce: iresult[i] = NPY_NAT continue @@ -626,6 +619,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', py_dt = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) except Exception: + if _parse_today_now(val, &iresult[i]): + continue if is_coerce: iresult[i] = NPY_NAT continue @@ -721,6 +716,19 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return oresult +cdef inline bint _parse_today_now(str val, int64_t* iresult): + # We delay this check for as long as possible + # because it catches relatively rare cases + if val == 'now': + # Note: this is *not* the same as Timestamp('now') + iresult[0] = Timestamp.utcnow().value + return True + elif val == 'today': + # Note: this is *not* the same as Timestamp('today') + iresult[0] = Timestamp.utcnow().normalize().value + return True + return False + # ---------------------------------------------------------------------- # Some general helper functions