-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Replaced void pointers with Types in JSON Datetime Conversions #30283
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d4b6c72
e7d4880
d2c6d36
6b6b669
ffd07a1
9f7d9ab
3fc9b90
7471721
a5201f9
946633e
087df67
47c6ea1
04f2d16
f8ed75d
98cb432
09b00fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -59,8 +59,8 @@ PyObject *cls_timedelta; | |||
|
||||
npy_int64 get_nat(void) { return NPY_MIN_INT64; } | ||||
|
||||
typedef void *(*PFN_PyTypeToJSON)(JSOBJ obj, JSONTypeContext *ti, | ||||
void *outValue, size_t *_outLen); | ||||
typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti, | ||||
size_t *_outLen); | ||||
|
||||
typedef struct __NpyArrContext { | ||||
PyObject *array; | ||||
|
@@ -94,7 +94,7 @@ typedef struct __TypeContext { | |||
JSPFN_ITERNEXT iterNext; | ||||
JSPFN_ITERGETNAME iterGetName; | ||||
JSPFN_ITERGETVALUE iterGetValue; | ||||
PFN_PyTypeToJSON PyTypeToJSON; | ||||
PFN_PyTypeToUTF8 PyTypeToUTF8; | ||||
PyObject *newObj; | ||||
PyObject *dictObj; | ||||
Py_ssize_t index; | ||||
|
@@ -396,96 +396,116 @@ static PyObject *get_item(PyObject *obj, Py_ssize_t i) { | |||
return ret; | ||||
} | ||||
|
||||
static void *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, | ||||
size_t *_outLen) { | ||||
static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) { | ||||
PyObject *obj = (PyObject *)_obj; | ||||
*_outLen = PyBytes_GET_SIZE(obj); | ||||
return PyBytes_AS_STRING(obj); | ||||
} | ||||
|
||||
static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, | ||||
size_t *_outLen) { | ||||
return PyUnicode_AsUTF8AndSize(_obj, _outLen); | ||||
static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) { | ||||
return (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen); | ||||
} | ||||
|
||||
static void *PandasDateTimeStructToJSON(npy_datetimestruct *dts, | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The diff is a little difficult to read here, but this got rid of the |
||||
JSONTypeContext *tc, void *outValue, | ||||
size_t *_outLen) { | ||||
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; | ||||
/* returns a char* and mutates the pointer to *len */ | ||||
static char *NpyDateTimeToIso(JSOBJ unused, JSONTypeContext *tc, size_t *len) { | ||||
npy_datetimestruct dts; | ||||
int ret_code; | ||||
int64_t longVal = GET_TC(tc)->longValue; | ||||
|
||||
if (((PyObjectEncoder *)tc->encoder)->datetimeIso) { | ||||
PRINTMARK(); | ||||
*_outLen = (size_t)get_datetime_iso_8601_strlen(0, base); | ||||
GET_TC(tc)->cStr = PyObject_Malloc(sizeof(char) * (*_outLen)); | ||||
if (!GET_TC(tc)->cStr) { | ||||
PyErr_NoMemory(); | ||||
((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; | ||||
return NULL; | ||||
} | ||||
pandas_datetime_to_datetimestruct(longVal, NPY_FR_ns, &dts); | ||||
|
||||
if (!make_iso_8601_datetime(dts, GET_TC(tc)->cStr, *_outLen, base)) { | ||||
PRINTMARK(); | ||||
*_outLen = strlen(GET_TC(tc)->cStr); | ||||
return GET_TC(tc)->cStr; | ||||
} else { | ||||
PRINTMARK(); | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"Could not convert datetime value to string"); | ||||
((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; | ||||
PyObject_Free(GET_TC(tc)->cStr); | ||||
return NULL; | ||||
} | ||||
} else { | ||||
PRINTMARK(); | ||||
*((JSINT64 *)outValue) = npy_datetimestruct_to_datetime(base, dts); | ||||
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; | ||||
*len = (size_t)get_datetime_iso_8601_strlen(0, base); | ||||
char *result = PyObject_Malloc(*len); | ||||
|
||||
if (result == NULL) { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how would we get here? if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would happen if malloc fails There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So failing to address it would mean getting something like a segfault? the PyErr_NoMemory just below raises back in py-space? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would segfault if you tried to access the memory if malloc failed to allocate it. FWIW the error handling here is copy / paste from here:
There are similar checks elsewhere throughout the module (not always consistent) so maybe worth unifying in a follow up |
||||
PyErr_NoMemory(); | ||||
((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; | ||||
return NULL; | ||||
} | ||||
} | ||||
|
||||
static void *NpyDateTimeScalarToJSON(JSOBJ _obj, JSONTypeContext *tc, | ||||
void *outValue, size_t *_outLen) { | ||||
npy_datetimestruct dts; | ||||
PyDatetimeScalarObject *obj = (PyDatetimeScalarObject *)_obj; | ||||
PRINTMARK(); | ||||
// TODO(anyone): Does not appear to be reached in tests. | ||||
ret_code = make_iso_8601_datetime(&dts, result, *len, base); | ||||
if (ret_code != 0) { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"Could not convert datetime value to string"); | ||||
((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; | ||||
PyObject_Free(result); | ||||
} | ||||
|
||||
pandas_datetime_to_datetimestruct(obj->obval, | ||||
(NPY_DATETIMEUNIT)obj->obmeta.base, &dts); | ||||
return PandasDateTimeStructToJSON(&dts, tc, outValue, _outLen); | ||||
// Note that get_datetime_iso_8601_strlen just gives a generic size | ||||
// for ISO string conversion, not the actual size used | ||||
*len = strlen(result); | ||||
return result; | ||||
} | ||||
|
||||
static void *PyDateTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, void *outValue, | ||||
size_t *_outLen) { | ||||
static npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { | ||||
scaleNanosecToUnit(&dt, base); | ||||
return dt; | ||||
} | ||||
|
||||
static char *PyDateTimeToIso(JSOBJ obj, JSONTypeContext *tc, size_t *len) { | ||||
npy_datetimestruct dts; | ||||
PyDateTime_Date *obj = (PyDateTime_Date *)_obj; | ||||
int ret; | ||||
|
||||
PRINTMARK(); | ||||
if (!PyDateTime_Check(obj)) { | ||||
// TODO: raise TypeError | ||||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
} | ||||
|
||||
if (!convert_pydatetime_to_datetimestruct(obj, &dts)) { | ||||
PRINTMARK(); | ||||
return PandasDateTimeStructToJSON(&dts, tc, outValue, _outLen); | ||||
} else { | ||||
ret = convert_pydatetime_to_datetimestruct(obj, &dts); | ||||
if (ret != 0) { | ||||
if (!PyErr_Occurred()) { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"Could not convert datetime value to string"); | ||||
"Could not convert PyDateTime to numpy datetime"); | ||||
} | ||||
((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; | ||||
return NULL; | ||||
} | ||||
|
||||
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; | ||||
*len = (size_t)get_datetime_iso_8601_strlen(0, base); | ||||
char *result = PyObject_Malloc(*len); | ||||
ret = make_iso_8601_datetime(&dts, result, *len, base); | ||||
|
||||
if (ret != 0) { | ||||
PRINTMARK(); | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"Could not convert datetime value to string"); | ||||
((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; | ||||
PyObject_Free(result); | ||||
return NULL; | ||||
} | ||||
|
||||
// Note that get_datetime_iso_8601_strlen just gives a generic size | ||||
// for ISO string conversion, not the actual size used | ||||
*len = strlen(result); | ||||
return result; | ||||
} | ||||
|
||||
static void *NpyDatetime64ToJSON(JSOBJ _obj, JSONTypeContext *tc, | ||||
void *outValue, size_t *_outLen) { | ||||
static npy_datetime PyDateTimeToEpoch(PyObject *obj, NPY_DATETIMEUNIT base) { | ||||
npy_datetimestruct dts; | ||||
PRINTMARK(); | ||||
int ret; | ||||
|
||||
if (!PyDateTime_Check(obj)) { | ||||
// TODO: raise TypeError | ||||
} | ||||
PyDateTime_Date *dt = (PyDateTime_Date *)obj; | ||||
|
||||
pandas_datetime_to_datetimestruct((npy_datetime)GET_TC(tc)->longValue, | ||||
NPY_FR_ns, &dts); | ||||
return PandasDateTimeStructToJSON(&dts, tc, outValue, _outLen); | ||||
ret = convert_pydatetime_to_datetimestruct(dt, &dts); | ||||
if (ret != 0) { | ||||
if (!PyErr_Occurred()) { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"Could not convert PyDateTime to numpy datetime"); | ||||
} | ||||
// TODO: is setting errMsg required? | ||||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
//((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; | ||||
// return NULL; | ||||
} | ||||
|
||||
npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); | ||||
return NpyDateTimeToEpoch(npy_dt, base); | ||||
} | ||||
|
||||
static void *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, void *outValue, | ||||
size_t *outLen) { | ||||
static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) { | ||||
PyObject *obj = (PyObject *)_obj; | ||||
PyObject *str; | ||||
PyObject *tmp; | ||||
|
@@ -509,49 +529,10 @@ static void *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, void *outValue, | |||
GET_TC(tc)->newObj = str; | ||||
|
||||
*outLen = PyBytes_GET_SIZE(str); | ||||
outValue = (void *)PyBytes_AS_STRING(str); | ||||
char *outValue = PyBytes_AS_STRING(str); | ||||
return outValue; | ||||
} | ||||
|
||||
static int NpyTypeToJSONType(PyObject *obj, JSONTypeContext *tc, int npyType, | ||||
void *value) { | ||||
PyArray_VectorUnaryFunc *castfunc; | ||||
npy_int64 longVal; | ||||
|
||||
if (PyTypeNum_ISDATETIME(npyType)) { | ||||
PRINTMARK(); | ||||
castfunc = | ||||
PyArray_GetCastFunc(PyArray_DescrFromType(npyType), NPY_INT64); | ||||
if (!castfunc) { | ||||
PyErr_Format(PyExc_ValueError, "Cannot cast numpy dtype %d to long", | ||||
npyType); | ||||
} | ||||
castfunc(value, &longVal, 1, NULL, NULL); | ||||
if (longVal == get_nat()) { | ||||
PRINTMARK(); | ||||
return JT_NULL; | ||||
} | ||||
|
||||
if (((PyObjectEncoder *)tc->encoder)->datetimeIso) { | ||||
GET_TC(tc)->longValue = (JSINT64)longVal; | ||||
GET_TC(tc)->PyTypeToJSON = NpyDatetime64ToJSON; | ||||
return JT_UTF8; | ||||
} else { | ||||
NPY_DATETIMEUNIT unit = | ||||
((PyObjectEncoder *)tc->encoder)->datetimeUnit; | ||||
if (!scaleNanosecToUnit(&longVal, unit)) { | ||||
GET_TC(tc)->longValue = longVal; | ||||
return JT_LONG; | ||||
} else { | ||||
// TODO: some kind of error handling | ||||
} | ||||
} | ||||
} | ||||
|
||||
PRINTMARK(); | ||||
return JT_INVALID; | ||||
} | ||||
|
||||
//============================================================================= | ||||
// Numpy array iteration functions | ||||
//============================================================================= | ||||
|
@@ -1705,29 +1686,6 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { | |||
obj = (PyObject *)_obj; | ||||
enc = (PyObjectEncoder *)tc->encoder; | ||||
|
||||
if (enc->npyType >= 0) { | ||||
PRINTMARK(); | ||||
tc->prv = &(enc->basicTypeContext); | ||||
tc->type = NpyTypeToJSONType(obj, tc, enc->npyType, enc->npyValue); | ||||
|
||||
if (tc->type == JT_INVALID) { | ||||
if (enc->defaultHandler) { | ||||
enc->npyType = -1; | ||||
PRINTMARK(); | ||||
Object_invokeDefaultHandler( | ||||
enc->npyCtxtPassthru->getitem(enc->npyValue, | ||||
enc->npyCtxtPassthru->array), | ||||
enc); | ||||
} else { | ||||
PyErr_Format(PyExc_RuntimeError, "Unhandled numpy dtype %d", | ||||
enc->npyType); | ||||
} | ||||
} | ||||
enc->npyCtxtPassthru = NULL; | ||||
enc->npyType = -1; | ||||
return; | ||||
} | ||||
|
||||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
if (PyBool_Check(obj)) { | ||||
PRINTMARK(); | ||||
tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE; | ||||
|
@@ -1745,6 +1703,44 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { | |||
} | ||||
tc->prv = pc; | ||||
|
||||
if (PyTypeNum_ISDATETIME(enc->npyType)) { | ||||
PRINTMARK(); | ||||
int64_t longVal; | ||||
PyArray_VectorUnaryFunc *castfunc = | ||||
PyArray_GetCastFunc(PyArray_DescrFromType(enc->npyType), NPY_INT64); | ||||
if (!castfunc) { | ||||
PyErr_Format(PyExc_ValueError, "Cannot cast numpy dtype %d to long", | ||||
enc->npyType); | ||||
} | ||||
castfunc(enc->npyValue, &longVal, 1, NULL, NULL); | ||||
if (longVal == get_nat()) { | ||||
PRINTMARK(); | ||||
tc->type = JT_NULL; | ||||
} else { | ||||
|
||||
if (enc->datetimeIso) { | ||||
PRINTMARK(); | ||||
pc->PyTypeToUTF8 = NpyDateTimeToIso; | ||||
// Currently no way to pass longVal to iso function, so use | ||||
// state management | ||||
GET_TC(tc)->longValue = longVal; | ||||
tc->type = JT_UTF8; | ||||
} else { | ||||
PRINTMARK(); | ||||
NPY_DATETIMEUNIT base = | ||||
((PyObjectEncoder *)tc->encoder)->datetimeUnit; | ||||
GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base); | ||||
tc->type = JT_LONG; | ||||
} | ||||
} | ||||
|
||||
// TODO: this prevents infinite loop with mixed-type DataFrames; | ||||
// refactor | ||||
enc->npyCtxtPassthru = NULL; | ||||
enc->npyType = -1; | ||||
return; | ||||
} | ||||
|
||||
if (PyIter_Check(obj) || | ||||
(PyArray_Check(obj) && !PyArray_CheckScalar(obj))) { | ||||
PRINTMARK(); | ||||
|
@@ -1776,12 +1772,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { | |||
return; | ||||
} else if (PyBytes_Check(obj)) { | ||||
PRINTMARK(); | ||||
pc->PyTypeToJSON = PyBytesToUTF8; | ||||
pc->PyTypeToUTF8 = PyBytesToUTF8; | ||||
tc->type = JT_UTF8; | ||||
return; | ||||
} else if (PyUnicode_Check(obj)) { | ||||
PRINTMARK(); | ||||
pc->PyTypeToJSON = PyUnicodeToUTF8; | ||||
pc->PyTypeToUTF8 = PyUnicodeToUTF8; | ||||
tc->type = JT_UTF8; | ||||
return; | ||||
} else if (PyObject_TypeCheck(obj, type_decimal)) { | ||||
|
@@ -1799,19 +1795,19 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { | |||
PRINTMARK(); | ||||
if (enc->datetimeIso) { | ||||
PRINTMARK(); | ||||
pc->PyTypeToJSON = PyDateTimeToJSON; | ||||
pc->PyTypeToUTF8 = PyDateTimeToIso; | ||||
tc->type = JT_UTF8; | ||||
} else { | ||||
PRINTMARK(); | ||||
// TODO: last argument here is unused; should decouple string | ||||
// from long datetimelike conversion routines | ||||
PyDateTimeToJSON(obj, tc, &(GET_TC(tc)->longValue), 0); | ||||
NPY_DATETIMEUNIT base = | ||||
((PyObjectEncoder *)tc->encoder)->datetimeUnit; | ||||
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); | ||||
tc->type = JT_LONG; | ||||
} | ||||
return; | ||||
} else if (PyTime_Check(obj)) { | ||||
PRINTMARK(); | ||||
pc->PyTypeToJSON = PyTimeToJSON; | ||||
pc->PyTypeToUTF8 = PyTimeToJSON; | ||||
tc->type = JT_UTF8; | ||||
return; | ||||
} else if (PyArray_IsScalar(obj, Datetime)) { | ||||
|
@@ -1823,8 +1819,17 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { | |||
} | ||||
|
||||
PRINTMARK(); | ||||
pc->PyTypeToJSON = NpyDateTimeScalarToJSON; | ||||
tc->type = enc->datetimeIso ? JT_UTF8 : JT_LONG; | ||||
if (enc->datetimeIso) { | ||||
PRINTMARK(); | ||||
pc->PyTypeToUTF8 = PyDateTimeToIso; | ||||
tc->type = JT_UTF8; | ||||
} else { | ||||
PRINTMARK(); | ||||
NPY_DATETIMEUNIT base = | ||||
((PyObjectEncoder *)tc->encoder)->datetimeUnit; | ||||
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); | ||||
tc->type = JT_LONG; | ||||
} | ||||
return; | ||||
} else if (PyDelta_Check(obj)) { | ||||
if (PyObject_HasAttrString(obj, "value")) { | ||||
|
@@ -2226,7 +2231,7 @@ void Object_endTypeContext(JSOBJ obj, JSONTypeContext *tc) { | |||
|
||||
const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc, | ||||
size_t *_outLen) { | ||||
return GET_TC(tc)->PyTypeToJSON(obj, tc, NULL, _outLen); | ||||
return GET_TC(tc)->PyTypeToUTF8(obj, tc, _outLen); | ||||
} | ||||
|
||||
JSINT64 Object_getLongValue(JSOBJ obj, JSONTypeContext *tc) { | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PyUnicode_AsUTF8AndSize came up in the thread about unicode surrogates. do we need to worry about those here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@WillAyd i think this is the same function where passing a surrogate unicode character can cause a segfault. can that be ruled out here?