Skip to content

Split out JSON Date Converters #31057

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions pandas/_libs/src/ujson/python/date_conversions.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Conversion routines that are useful for serialization,
// but which don't interact with JSON objects directly

#include "date_conversions.h"
#include <../../../tslibs/src/datetime/np_datetime.h>
#include <../../../tslibs/src/datetime/np_datetime_strings.h>

/*
* Function: scaleNanosecToUnit
* -----------------------------
*
* Scales an integer value representing time in nanoseconds to provided unit.
*
* Mutates the provided value directly. Returns 0 on success, non-zero on error.
*/
int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) {
switch (unit) {
case NPY_FR_ns:
break;
case NPY_FR_us:
*value /= 1000LL;
break;
case NPY_FR_ms:
*value /= 1000000LL;
break;
case NPY_FR_s:
*value /= 1000000000LL;
break;
default:
return -1;
}

return 0;
}

/* Converts the int64_t representation of a datetime to ISO; mutates len */
char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) {
npy_datetimestruct dts;
int ret_code;

pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts);

*len = (size_t)get_datetime_iso_8601_strlen(0, base);
char *result = PyObject_Malloc(*len);

if (result == NULL) {
PyErr_NoMemory();
return NULL;
}

ret_code = make_iso_8601_datetime(&dts, result, *len, base);
if (ret_code != 0) {
PyErr_SetString(PyExc_ValueError,
"Could not convert datetime value to string");
PyObject_Free(result);
}

// Note that get_datetime_iso_8601_strlen just gives a generic size
// for ISO string conversion, not the actual size used
*len = strlen(result);
return result;
}

npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) {
scaleNanosecToUnit(&dt, base);
return dt;
}

/* Convert PyDatetime To ISO C-string. mutates len */
char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
size_t *len) {
npy_datetimestruct dts;
int ret;

ret = convert_pydatetime_to_datetimestruct(obj, &dts);
if (ret != 0) {
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"Could not convert PyDateTime to numpy datetime");
}
return NULL;
}

*len = (size_t)get_datetime_iso_8601_strlen(0, base);
char *result = PyObject_Malloc(*len);
ret = make_iso_8601_datetime(&dts, result, *len, base);

if (ret != 0) {
PyErr_SetString(PyExc_ValueError,
"Could not convert datetime value to string");
PyObject_Free(result);
return NULL;
}

// Note that get_datetime_iso_8601_strlen just gives a generic size
// for ISO string conversion, not the actual size used
*len = strlen(result);
return result;
}

npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base) {
npy_datetimestruct dts;
int ret;

ret = convert_pydatetime_to_datetimestruct(dt, &dts);
if (ret != 0) {
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"Could not convert PyDateTime to numpy datetime");
}
// TODO: is setting errMsg required?
//((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
// return NULL;
}

npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts);
return NpyDateTimeToEpoch(npy_dt, base);
}
31 changes: 31 additions & 0 deletions pandas/_libs/src/ujson/python/date_conversions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#ifndef PANDAS__LIBS_SRC_UJSON_DATE_CONVERSIONS
#define PANDAS__LIBS_SRC_UJSON_DATE_CONVERSIONS

#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <numpy/ndarraytypes.h>
#include "datetime.h"

// Scales value inplace from nanosecond resolution to unit resolution
int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit);

// Converts an int64 object representing a date to ISO format
// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z
// while base="ns" yields "2020-01-01T00:00:00.000000000Z"
// len is mutated to save the length of the returned string
char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len);

// TODO: this function doesn't do a lot; should augment or replace with
// scaleNanosecToUnit
npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base);

// Converts a Python object representing a Date / Datetime to ISO format
// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z
// while base="ns" yields "2020-01-01T00:00:00.000000000Z"
// len is mutated to save the length of the returned string
char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, size_t *len);

// Convert a Python Date/Datetime to Unix epoch with resolution base
npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a minor detail I had to modify the signature of this which previously accepted a PyObject and perform a PyDateTime_Check call. The problem with moving that into a separate file is that you need to call PyDateTime_IMPORT to set an object with static duration in this file separate from what is already in objToJSON.c and there's not a great place to really set that.

We didn't explicitly do anything with the check previously and this aligns better with PyDateTimeToIso so I think not a big deal


#endif
125 changes: 3 additions & 122 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ Numeric decoder derived from from TCL library
#include <numpy/ndarraytypes.h>
#include <numpy/npy_math.h>
#include <ultrajson.h>
#include <../../../tslibs/src/datetime/np_datetime.h>
#include <../../../tslibs/src/datetime/np_datetime_strings.h>
#include "date_conversions.h"
#include "datetime.h"

static PyTypeObject *type_decimal;
Expand Down Expand Up @@ -209,34 +208,6 @@ static TypeContext *createTypeContext(void) {
return pc;
}

/*
* Function: scaleNanosecToUnit
* -----------------------------
*
* Scales an integer value representing time in nanoseconds to provided unit.
*
* Mutates the provided value directly. Returns 0 on success, non-zero on error.
*/
static int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) {
switch (unit) {
case NPY_FR_ns:
break;
case NPY_FR_us:
*value /= 1000LL;
break;
case NPY_FR_ms:
*value /= 1000000LL;
break;
case NPY_FR_s:
*value /= 1000000000LL;
break;
default:
return -1;
}

return 0;
}

static PyObject *get_values(PyObject *obj) {
PyObject *values = NULL;

Expand Down Expand Up @@ -379,79 +350,13 @@ static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
return (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen);
}

/* Converts the int64_t representation of a datetime to ISO; mutates len */
static char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) {
npy_datetimestruct dts;
int ret_code;

pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts);

*len = (size_t)get_datetime_iso_8601_strlen(0, base);
char *result = PyObject_Malloc(*len);

if (result == NULL) {
PyErr_NoMemory();
return NULL;
}

ret_code = make_iso_8601_datetime(&dts, result, *len, base);
if (ret_code != 0) {
PyErr_SetString(PyExc_ValueError,
"Could not convert datetime value to string");
PyObject_Free(result);
}

// Note that get_datetime_iso_8601_strlen just gives a generic size
// for ISO string conversion, not the actual size used
*len = strlen(result);
return result;
}

/* JSON callback. returns a char* and mutates the pointer to *len */
static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
JSONTypeContext *tc, size_t *len) {
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
return int64ToIso(GET_TC(tc)->longValue, base, len);
}

static npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) {
scaleNanosecToUnit(&dt, base);
return dt;
}

/* Convert PyDatetime To ISO C-string. mutates len */
static char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
size_t *len) {
npy_datetimestruct dts;
int ret;

ret = convert_pydatetime_to_datetimestruct(obj, &dts);
if (ret != 0) {
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"Could not convert PyDateTime to numpy datetime");
}
return NULL;
}

*len = (size_t)get_datetime_iso_8601_strlen(0, base);
char *result = PyObject_Malloc(*len);
ret = make_iso_8601_datetime(&dts, result, *len, base);

if (ret != 0) {
PRINTMARK();
PyErr_SetString(PyExc_ValueError,
"Could not convert datetime value to string");
PyObject_Free(result);
return NULL;
}

// Note that get_datetime_iso_8601_strlen just gives a generic size
// for ISO string conversion, not the actual size used
*len = strlen(result);
return result;
}

/* JSON callback */
static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
size_t *len) {
Expand All @@ -465,30 +370,6 @@ static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
return PyDateTimeToIso(obj, base, len);
}

static npy_datetime PyDateTimeToEpoch(PyObject *obj, NPY_DATETIMEUNIT base) {
npy_datetimestruct dts;
int ret;

if (!PyDate_Check(obj)) {
// TODO: raise TypeError
}
PyDateTime_Date *dt = (PyDateTime_Date *)obj;

ret = convert_pydatetime_to_datetimestruct(dt, &dts);
if (ret != 0) {
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"Could not convert PyDateTime to numpy datetime");
}
// TODO: is setting errMsg required?
//((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
// return NULL;
}

npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts);
return NpyDateTimeToEpoch(npy_dt, base);
}

static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
PyObject *obj = (PyObject *)_obj;
PyObject *str;
Expand Down Expand Up @@ -1814,7 +1695,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
PRINTMARK();
NPY_DATETIMEUNIT base =
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
GET_TC(tc)->longValue = PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
tc->type = JT_LONG;
}
return;
Expand All @@ -1840,7 +1721,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
PRINTMARK();
NPY_DATETIMEUNIT base =
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
GET_TC(tc)->longValue = PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
tc->type = JT_LONG;
}
return;
Expand Down
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def initialize_options(self):
pjoin(ujson_python, "ujson.c"),
pjoin(ujson_python, "objToJSON.c"),
pjoin(ujson_python, "JSONtoObj.c"),
pjoin(ujson_python, "date_conversions.c"),
pjoin(ujson_lib, "ultrajsonenc.c"),
pjoin(ujson_lib, "ultrajsondec.c"),
pjoin(util, "move.c"),
Expand Down Expand Up @@ -714,11 +715,15 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):

ujson_ext = Extension(
"pandas._libs.json",
depends=["pandas/_libs/src/ujson/lib/ultrajson.h"],
depends=[
"pandas/_libs/src/ujson/lib/ultrajson.h",
"pandas/_libs/src/ujson/python/date_conversions.h",
],
sources=(
[
"pandas/_libs/src/ujson/python/ujson.c",
"pandas/_libs/src/ujson/python/objToJSON.c",
"pandas/_libs/src/ujson/python/date_conversions.c",
"pandas/_libs/src/ujson/python/JSONtoObj.c",
"pandas/_libs/src/ujson/lib/ultrajsonenc.c",
"pandas/_libs/src/ujson/lib/ultrajsondec.c",
Expand Down