From 856b565442ae6175af90a39ed8a77ba209eed7b9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 15 Jan 2020 17:11:34 -0800 Subject: [PATCH 1/4] Split out datetime conversion from JSON --- .../_libs/src/ujson/python/date_conversions.c | 120 +++++++++++++++++ .../_libs/src/ujson/python/date_conversions.h | 31 +++++ pandas/_libs/src/ujson/python/objToJSON.c | 127 +----------------- setup.py | 4 +- 4 files changed, 158 insertions(+), 124 deletions(-) create mode 100644 pandas/_libs/src/ujson/python/date_conversions.c create mode 100644 pandas/_libs/src/ujson/python/date_conversions.h diff --git a/pandas/_libs/src/ujson/python/date_conversions.c b/pandas/_libs/src/ujson/python/date_conversions.c new file mode 100644 index 0000000000000..8f4efdda51609 --- /dev/null +++ b/pandas/_libs/src/ujson/python/date_conversions.c @@ -0,0 +1,120 @@ +// Conversion routines that are useful for seralization, +// but which don't interact with JSON objects directly + +#include "date_conversions.h" +#include <../../../tslibs/src/datetime/np_datetime.h> +#include <../../../tslibs/src/datetime/np_datetime_strings.h> + +/* + * Function: scaleNanosecToUnit + * ----------------------------- + * + * Scales an integer value representing time in nanoseconds to provided unit. + * + * Mutates the provided value directly. Returns 0 on success, non-zero on error. + */ +int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { + switch (unit) { + case NPY_FR_ns: + break; + case NPY_FR_us: + *value /= 1000LL; + break; + case NPY_FR_ms: + *value /= 1000000LL; + break; + case NPY_FR_s: + *value /= 1000000000LL; + break; + default: + return -1; + } + + return 0; +} + +/* Converts the int64_t representation of a datetime to ISO; mutates len */ +char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { + npy_datetimestruct dts; + int ret_code; + + pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + + ret_code = make_iso_8601_datetime(&dts, result, *len, base); + if (ret_code != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { + scaleNanosecToUnit(&dt, base); + return dt; +} + +/* Convert PyDatetime To ISO C-string. mutates len */ +char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, + size_t *len) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(obj, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + return NULL; + } + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + ret = make_iso_8601_datetime(&dts, result, *len, base); + + if (ret != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + return NULL; + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + + + +npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(dt, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + // TODO: is setting errMsg required? + //((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + // return NULL; + } + + npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); + return NpyDateTimeToEpoch(npy_dt, base); +} diff --git a/pandas/_libs/src/ujson/python/date_conversions.h b/pandas/_libs/src/ujson/python/date_conversions.h new file mode 100644 index 0000000000000..45455f4d6128b --- /dev/null +++ b/pandas/_libs/src/ujson/python/date_conversions.h @@ -0,0 +1,31 @@ +#ifndef PANDAS__LIBS_SRC_UJSON_DATE_CONVERSIONS +#define PANDAS__LIBS_SRC_UJSON_DATE_CONVERSIONS + +#define PY_SSIZE_T_CLEAN +#include +#include +#include "datetime.h" + +// Scales value inplace from nanosecond resolution to unit resolution +int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit); + +// Converts an int64 object representing a date to ISO format +// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z +// while base="ns" yields "2020-01-01T00:00:00.000000000Z" +// len is mutated to save the length of the returned string +char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len); + +// TODO: this function doesn't do a lot; should augment or replace with +// scaleNanosecToUnit +npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base); + +// Converts a Python object representing a Date / Datetime to ISO format +// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z +// while base="ns" yields "2020-01-01T00:00:00.000000000Z" +// len is mutated to save the length of the returned string +char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, size_t *len); + +// Convert a Python Date/Datetime to Unix epoch with resolution base +npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base); + +#endif diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index c413a16f8d5f0..de749ee67bbdb 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -45,8 +45,7 @@ Numeric decoder derived from from TCL library #include #include #include -#include <../../../tslibs/src/datetime/np_datetime.h> -#include <../../../tslibs/src/datetime/np_datetime_strings.h> +#include "date_conversions.h" #include "datetime.h" static PyTypeObject *type_decimal; @@ -209,34 +208,6 @@ static TypeContext *createTypeContext(void) { return pc; } -/* - * Function: scaleNanosecToUnit - * ----------------------------- - * - * Scales an integer value representing time in nanoseconds to provided unit. - * - * Mutates the provided value directly. Returns 0 on success, non-zero on error. - */ -static int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { - switch (unit) { - case NPY_FR_ns: - break; - case NPY_FR_us: - *value /= 1000LL; - break; - case NPY_FR_ms: - *value /= 1000000LL; - break; - case NPY_FR_s: - *value /= 1000000000LL; - break; - default: - return -1; - } - - return 0; -} - static PyObject *get_values(PyObject *obj) { PyObject *values = NULL; @@ -379,34 +350,6 @@ static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc), return (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen); } -/* Converts the int64_t representation of a datetime to ISO; mutates len */ -static char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { - npy_datetimestruct dts; - int ret_code; - - pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); - - *len = (size_t)get_datetime_iso_8601_strlen(0, base); - char *result = PyObject_Malloc(*len); - - if (result == NULL) { - PyErr_NoMemory(); - return NULL; - } - - ret_code = make_iso_8601_datetime(&dts, result, *len, base); - if (ret_code != 0) { - PyErr_SetString(PyExc_ValueError, - "Could not convert datetime value to string"); - PyObject_Free(result); - } - - // Note that get_datetime_iso_8601_strlen just gives a generic size - // for ISO string conversion, not the actual size used - *len = strlen(result); - return result; -} - /* JSON callback. returns a char* and mutates the pointer to *len */ static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused), JSONTypeContext *tc, size_t *len) { @@ -414,44 +357,6 @@ static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused), return int64ToIso(GET_TC(tc)->longValue, base, len); } -static npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { - scaleNanosecToUnit(&dt, base); - return dt; -} - -/* Convert PyDatetime To ISO C-string. mutates len */ -static char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, - size_t *len) { - npy_datetimestruct dts; - int ret; - - ret = convert_pydatetime_to_datetimestruct(obj, &dts); - if (ret != 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Could not convert PyDateTime to numpy datetime"); - } - return NULL; - } - - *len = (size_t)get_datetime_iso_8601_strlen(0, base); - char *result = PyObject_Malloc(*len); - ret = make_iso_8601_datetime(&dts, result, *len, base); - - if (ret != 0) { - PRINTMARK(); - PyErr_SetString(PyExc_ValueError, - "Could not convert datetime value to string"); - PyObject_Free(result); - return NULL; - } - - // Note that get_datetime_iso_8601_strlen just gives a generic size - // for ISO string conversion, not the actual size used - *len = strlen(result); - return result; -} - /* JSON callback */ static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, size_t *len) { @@ -465,30 +370,6 @@ static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, return PyDateTimeToIso(obj, base, len); } -static npy_datetime PyDateTimeToEpoch(PyObject *obj, NPY_DATETIMEUNIT base) { - npy_datetimestruct dts; - int ret; - - if (!PyDateTime_Check(obj)) { - // TODO: raise TypeError - } - PyDateTime_Date *dt = (PyDateTime_Date *)obj; - - ret = convert_pydatetime_to_datetimestruct(dt, &dts); - if (ret != 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Could not convert PyDateTime to numpy datetime"); - } - // TODO: is setting errMsg required? - //((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; - // return NULL; - } - - npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); - return NpyDateTimeToEpoch(npy_dt, base); -} - static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) { PyObject *obj = (PyObject *)_obj; PyObject *str; @@ -1593,7 +1474,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, } else { cLabel = PyObject_Malloc(21); // 21 chars for int64 sprintf(cLabel, "%" NPY_DATETIME_FMT, - PyDateTimeToEpoch(item, base)); + PyDateTimeToEpoch((PyDateTime_Date *)item, base)); len = strlen(cLabel); } } else { // Fallback to string representation @@ -1784,7 +1665,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { PRINTMARK(); NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + GET_TC(tc)->longValue = PyDateTimeToEpoch((PyDateTime_Date *)obj, base); tc->type = JT_LONG; } return; @@ -1810,7 +1691,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { PRINTMARK(); NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + GET_TC(tc)->longValue = PyDateTimeToEpoch((PyDateTime_Date *)obj, base); tc->type = JT_LONG; } return; diff --git a/setup.py b/setup.py index c33ce063cb4d9..ee68c9cf0dba6 100755 --- a/setup.py +++ b/setup.py @@ -240,6 +240,7 @@ def initialize_options(self): pjoin(ujson_python, "ujson.c"), pjoin(ujson_python, "objToJSON.c"), pjoin(ujson_python, "JSONtoObj.c"), + pjoin(ujson_python, "date_conversions.c"), pjoin(ujson_lib, "ultrajsonenc.c"), pjoin(ujson_lib, "ultrajsondec.c"), pjoin(util, "move.c"), @@ -715,11 +716,12 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): ujson_ext = Extension( "pandas._libs.json", - depends=["pandas/_libs/src/ujson/lib/ultrajson.h"], + depends=["pandas/_libs/src/ujson/lib/ultrajson.h", "pandas/_libs/src/ujson/python/date_conversions.h"], sources=( [ "pandas/_libs/src/ujson/python/ujson.c", "pandas/_libs/src/ujson/python/objToJSON.c", + "pandas/_libs/src/ujson/python/date_conversions.c", "pandas/_libs/src/ujson/python/JSONtoObj.c", "pandas/_libs/src/ujson/lib/ultrajsonenc.c", "pandas/_libs/src/ujson/lib/ultrajsondec.c", From 173429445bcec62e1123877d7902c28d18624936 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 15 Jan 2020 17:12:03 -0800 Subject: [PATCH 2/4] clang format --- pandas/_libs/src/ujson/python/date_conversions.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/src/ujson/python/date_conversions.c b/pandas/_libs/src/ujson/python/date_conversions.c index 8f4efdda51609..cc76d3969822d 100644 --- a/pandas/_libs/src/ujson/python/date_conversions.c +++ b/pandas/_libs/src/ujson/python/date_conversions.c @@ -98,8 +98,6 @@ char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, return result; } - - npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base) { npy_datetimestruct dts; int ret; From ce13042f05ae6ef6c130da256c64e3b2f821effd Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 15 Jan 2020 17:14:15 -0800 Subject: [PATCH 3/4] Black --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ee68c9cf0dba6..2ea792ae352e9 100755 --- a/setup.py +++ b/setup.py @@ -716,7 +716,10 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): ujson_ext = Extension( "pandas._libs.json", - depends=["pandas/_libs/src/ujson/lib/ultrajson.h", "pandas/_libs/src/ujson/python/date_conversions.h"], + depends=[ + "pandas/_libs/src/ujson/lib/ultrajson.h", + "pandas/_libs/src/ujson/python/date_conversions.h", + ], sources=( [ "pandas/_libs/src/ujson/python/ujson.c", From ac6b2b8d68ef96981040615e8ef53f597457d81b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 16 Jan 2020 08:17:41 -0800 Subject: [PATCH 4/4] typo --- pandas/_libs/src/ujson/python/date_conversions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/ujson/python/date_conversions.c b/pandas/_libs/src/ujson/python/date_conversions.c index cc76d3969822d..fc4bdef8463af 100644 --- a/pandas/_libs/src/ujson/python/date_conversions.c +++ b/pandas/_libs/src/ujson/python/date_conversions.c @@ -1,4 +1,4 @@ -// Conversion routines that are useful for seralization, +// Conversion routines that are useful for serialization, // but which don't interact with JSON objects directly #include "date_conversions.h"