Skip to content

Commit 2a6c2d7

Browse files
WillAydjreback
authored andcommitted
Replaced void pointers with Types in JSON Datetime Conversions (#30283)
1 parent c74de79 commit 2a6c2d7

File tree

1 file changed

+141
-136
lines changed

1 file changed

+141
-136
lines changed

pandas/_libs/src/ujson/python/objToJSON.c

+141-136
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ PyObject *cls_timedelta;
5959

6060
npy_int64 get_nat(void) { return NPY_MIN_INT64; }
6161

62-
typedef void *(*PFN_PyTypeToJSON)(JSOBJ obj, JSONTypeContext *ti,
63-
void *outValue, size_t *_outLen);
62+
typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
63+
size_t *_outLen);
6464

6565
typedef struct __NpyArrContext {
6666
PyObject *array;
@@ -94,7 +94,7 @@ typedef struct __TypeContext {
9494
JSPFN_ITERNEXT iterNext;
9595
JSPFN_ITERGETNAME iterGetName;
9696
JSPFN_ITERGETVALUE iterGetValue;
97-
PFN_PyTypeToJSON PyTypeToJSON;
97+
PFN_PyTypeToUTF8 PyTypeToUTF8;
9898
PyObject *newObj;
9999
PyObject *dictObj;
100100
Py_ssize_t index;
@@ -396,96 +396,116 @@ static PyObject *get_item(PyObject *obj, Py_ssize_t i) {
396396
return ret;
397397
}
398398

399-
static void *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
400-
size_t *_outLen) {
399+
static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) {
401400
PyObject *obj = (PyObject *)_obj;
402401
*_outLen = PyBytes_GET_SIZE(obj);
403402
return PyBytes_AS_STRING(obj);
404403
}
405404

406-
static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
407-
size_t *_outLen) {
408-
return PyUnicode_AsUTF8AndSize(_obj, _outLen);
405+
static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) {
406+
return (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen);
409407
}
410408

411-
static void *PandasDateTimeStructToJSON(npy_datetimestruct *dts,
412-
JSONTypeContext *tc, void *outValue,
413-
size_t *_outLen) {
414-
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
409+
/* returns a char* and mutates the pointer to *len */
410+
static char *NpyDateTimeToIso(JSOBJ unused, JSONTypeContext *tc, size_t *len) {
411+
npy_datetimestruct dts;
412+
int ret_code;
413+
int64_t longVal = GET_TC(tc)->longValue;
415414

416-
if (((PyObjectEncoder *)tc->encoder)->datetimeIso) {
417-
PRINTMARK();
418-
*_outLen = (size_t)get_datetime_iso_8601_strlen(0, base);
419-
GET_TC(tc)->cStr = PyObject_Malloc(sizeof(char) * (*_outLen));
420-
if (!GET_TC(tc)->cStr) {
421-
PyErr_NoMemory();
422-
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
423-
return NULL;
424-
}
415+
pandas_datetime_to_datetimestruct(longVal, NPY_FR_ns, &dts);
425416

426-
if (!make_iso_8601_datetime(dts, GET_TC(tc)->cStr, *_outLen, base)) {
427-
PRINTMARK();
428-
*_outLen = strlen(GET_TC(tc)->cStr);
429-
return GET_TC(tc)->cStr;
430-
} else {
431-
PRINTMARK();
432-
PyErr_SetString(PyExc_ValueError,
433-
"Could not convert datetime value to string");
434-
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
435-
PyObject_Free(GET_TC(tc)->cStr);
436-
return NULL;
437-
}
438-
} else {
439-
PRINTMARK();
440-
*((JSINT64 *)outValue) = npy_datetimestruct_to_datetime(base, dts);
417+
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
418+
*len = (size_t)get_datetime_iso_8601_strlen(0, base);
419+
char *result = PyObject_Malloc(*len);
420+
421+
if (result == NULL) {
422+
PyErr_NoMemory();
423+
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
441424
return NULL;
442425
}
443-
}
444426

445-
static void *NpyDateTimeScalarToJSON(JSOBJ _obj, JSONTypeContext *tc,
446-
void *outValue, size_t *_outLen) {
447-
npy_datetimestruct dts;
448-
PyDatetimeScalarObject *obj = (PyDatetimeScalarObject *)_obj;
449-
PRINTMARK();
450-
// TODO(anyone): Does not appear to be reached in tests.
427+
ret_code = make_iso_8601_datetime(&dts, result, *len, base);
428+
if (ret_code != 0) {
429+
PyErr_SetString(PyExc_ValueError,
430+
"Could not convert datetime value to string");
431+
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
432+
PyObject_Free(result);
433+
}
451434

452-
pandas_datetime_to_datetimestruct(obj->obval,
453-
(NPY_DATETIMEUNIT)obj->obmeta.base, &dts);
454-
return PandasDateTimeStructToJSON(&dts, tc, outValue, _outLen);
435+
// Note that get_datetime_iso_8601_strlen just gives a generic size
436+
// for ISO string conversion, not the actual size used
437+
*len = strlen(result);
438+
return result;
455439
}
456440

457-
static void *PyDateTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
458-
size_t *_outLen) {
441+
static npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) {
442+
scaleNanosecToUnit(&dt, base);
443+
return dt;
444+
}
445+
446+
static char *PyDateTimeToIso(JSOBJ obj, JSONTypeContext *tc, size_t *len) {
459447
npy_datetimestruct dts;
460-
PyDateTime_Date *obj = (PyDateTime_Date *)_obj;
448+
int ret;
461449

462-
PRINTMARK();
450+
if (!PyDateTime_Check(obj)) {
451+
// TODO: raise TypeError
452+
}
463453

464-
if (!convert_pydatetime_to_datetimestruct(obj, &dts)) {
465-
PRINTMARK();
466-
return PandasDateTimeStructToJSON(&dts, tc, outValue, _outLen);
467-
} else {
454+
ret = convert_pydatetime_to_datetimestruct(obj, &dts);
455+
if (ret != 0) {
468456
if (!PyErr_Occurred()) {
469457
PyErr_SetString(PyExc_ValueError,
470-
"Could not convert datetime value to string");
458+
"Could not convert PyDateTime to numpy datetime");
471459
}
472460
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
473461
return NULL;
474462
}
463+
464+
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
465+
*len = (size_t)get_datetime_iso_8601_strlen(0, base);
466+
char *result = PyObject_Malloc(*len);
467+
ret = make_iso_8601_datetime(&dts, result, *len, base);
468+
469+
if (ret != 0) {
470+
PRINTMARK();
471+
PyErr_SetString(PyExc_ValueError,
472+
"Could not convert datetime value to string");
473+
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
474+
PyObject_Free(result);
475+
return NULL;
476+
}
477+
478+
// Note that get_datetime_iso_8601_strlen just gives a generic size
479+
// for ISO string conversion, not the actual size used
480+
*len = strlen(result);
481+
return result;
475482
}
476483

477-
static void *NpyDatetime64ToJSON(JSOBJ _obj, JSONTypeContext *tc,
478-
void *outValue, size_t *_outLen) {
484+
static npy_datetime PyDateTimeToEpoch(PyObject *obj, NPY_DATETIMEUNIT base) {
479485
npy_datetimestruct dts;
480-
PRINTMARK();
486+
int ret;
487+
488+
if (!PyDateTime_Check(obj)) {
489+
// TODO: raise TypeError
490+
}
491+
PyDateTime_Date *dt = (PyDateTime_Date *)obj;
481492

482-
pandas_datetime_to_datetimestruct((npy_datetime)GET_TC(tc)->longValue,
483-
NPY_FR_ns, &dts);
484-
return PandasDateTimeStructToJSON(&dts, tc, outValue, _outLen);
493+
ret = convert_pydatetime_to_datetimestruct(dt, &dts);
494+
if (ret != 0) {
495+
if (!PyErr_Occurred()) {
496+
PyErr_SetString(PyExc_ValueError,
497+
"Could not convert PyDateTime to numpy datetime");
498+
}
499+
// TODO: is setting errMsg required?
500+
//((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
501+
// return NULL;
502+
}
503+
504+
npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts);
505+
return NpyDateTimeToEpoch(npy_dt, base);
485506
}
486507

487-
static void *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
488-
size_t *outLen) {
508+
static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
489509
PyObject *obj = (PyObject *)_obj;
490510
PyObject *str;
491511
PyObject *tmp;
@@ -509,49 +529,10 @@ static void *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
509529
GET_TC(tc)->newObj = str;
510530

511531
*outLen = PyBytes_GET_SIZE(str);
512-
outValue = (void *)PyBytes_AS_STRING(str);
532+
char *outValue = PyBytes_AS_STRING(str);
513533
return outValue;
514534
}
515535

516-
static int NpyTypeToJSONType(PyObject *obj, JSONTypeContext *tc, int npyType,
517-
void *value) {
518-
PyArray_VectorUnaryFunc *castfunc;
519-
npy_int64 longVal;
520-
521-
if (PyTypeNum_ISDATETIME(npyType)) {
522-
PRINTMARK();
523-
castfunc =
524-
PyArray_GetCastFunc(PyArray_DescrFromType(npyType), NPY_INT64);
525-
if (!castfunc) {
526-
PyErr_Format(PyExc_ValueError, "Cannot cast numpy dtype %d to long",
527-
npyType);
528-
}
529-
castfunc(value, &longVal, 1, NULL, NULL);
530-
if (longVal == get_nat()) {
531-
PRINTMARK();
532-
return JT_NULL;
533-
}
534-
535-
if (((PyObjectEncoder *)tc->encoder)->datetimeIso) {
536-
GET_TC(tc)->longValue = (JSINT64)longVal;
537-
GET_TC(tc)->PyTypeToJSON = NpyDatetime64ToJSON;
538-
return JT_UTF8;
539-
} else {
540-
NPY_DATETIMEUNIT unit =
541-
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
542-
if (!scaleNanosecToUnit(&longVal, unit)) {
543-
GET_TC(tc)->longValue = longVal;
544-
return JT_LONG;
545-
} else {
546-
// TODO: some kind of error handling
547-
}
548-
}
549-
}
550-
551-
PRINTMARK();
552-
return JT_INVALID;
553-
}
554-
555536
//=============================================================================
556537
// Numpy array iteration functions
557538
//=============================================================================
@@ -1705,29 +1686,6 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
17051686
obj = (PyObject *)_obj;
17061687
enc = (PyObjectEncoder *)tc->encoder;
17071688

1708-
if (enc->npyType >= 0) {
1709-
PRINTMARK();
1710-
tc->prv = &(enc->basicTypeContext);
1711-
tc->type = NpyTypeToJSONType(obj, tc, enc->npyType, enc->npyValue);
1712-
1713-
if (tc->type == JT_INVALID) {
1714-
if (enc->defaultHandler) {
1715-
enc->npyType = -1;
1716-
PRINTMARK();
1717-
Object_invokeDefaultHandler(
1718-
enc->npyCtxtPassthru->getitem(enc->npyValue,
1719-
enc->npyCtxtPassthru->array),
1720-
enc);
1721-
} else {
1722-
PyErr_Format(PyExc_RuntimeError, "Unhandled numpy dtype %d",
1723-
enc->npyType);
1724-
}
1725-
}
1726-
enc->npyCtxtPassthru = NULL;
1727-
enc->npyType = -1;
1728-
return;
1729-
}
1730-
17311689
if (PyBool_Check(obj)) {
17321690
PRINTMARK();
17331691
tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE;
@@ -1745,6 +1703,44 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
17451703
}
17461704
tc->prv = pc;
17471705

1706+
if (PyTypeNum_ISDATETIME(enc->npyType)) {
1707+
PRINTMARK();
1708+
int64_t longVal;
1709+
PyArray_VectorUnaryFunc *castfunc =
1710+
PyArray_GetCastFunc(PyArray_DescrFromType(enc->npyType), NPY_INT64);
1711+
if (!castfunc) {
1712+
PyErr_Format(PyExc_ValueError, "Cannot cast numpy dtype %d to long",
1713+
enc->npyType);
1714+
}
1715+
castfunc(enc->npyValue, &longVal, 1, NULL, NULL);
1716+
if (longVal == get_nat()) {
1717+
PRINTMARK();
1718+
tc->type = JT_NULL;
1719+
} else {
1720+
1721+
if (enc->datetimeIso) {
1722+
PRINTMARK();
1723+
pc->PyTypeToUTF8 = NpyDateTimeToIso;
1724+
// Currently no way to pass longVal to iso function, so use
1725+
// state management
1726+
GET_TC(tc)->longValue = longVal;
1727+
tc->type = JT_UTF8;
1728+
} else {
1729+
PRINTMARK();
1730+
NPY_DATETIMEUNIT base =
1731+
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1732+
GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base);
1733+
tc->type = JT_LONG;
1734+
}
1735+
}
1736+
1737+
// TODO: this prevents infinite loop with mixed-type DataFrames;
1738+
// refactor
1739+
enc->npyCtxtPassthru = NULL;
1740+
enc->npyType = -1;
1741+
return;
1742+
}
1743+
17481744
if (PyIter_Check(obj) ||
17491745
(PyArray_Check(obj) && !PyArray_CheckScalar(obj))) {
17501746
PRINTMARK();
@@ -1776,12 +1772,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
17761772
return;
17771773
} else if (PyBytes_Check(obj)) {
17781774
PRINTMARK();
1779-
pc->PyTypeToJSON = PyBytesToUTF8;
1775+
pc->PyTypeToUTF8 = PyBytesToUTF8;
17801776
tc->type = JT_UTF8;
17811777
return;
17821778
} else if (PyUnicode_Check(obj)) {
17831779
PRINTMARK();
1784-
pc->PyTypeToJSON = PyUnicodeToUTF8;
1780+
pc->PyTypeToUTF8 = PyUnicodeToUTF8;
17851781
tc->type = JT_UTF8;
17861782
return;
17871783
} else if (PyObject_TypeCheck(obj, type_decimal)) {
@@ -1799,19 +1795,19 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
17991795
PRINTMARK();
18001796
if (enc->datetimeIso) {
18011797
PRINTMARK();
1802-
pc->PyTypeToJSON = PyDateTimeToJSON;
1798+
pc->PyTypeToUTF8 = PyDateTimeToIso;
18031799
tc->type = JT_UTF8;
18041800
} else {
18051801
PRINTMARK();
1806-
// TODO: last argument here is unused; should decouple string
1807-
// from long datetimelike conversion routines
1808-
PyDateTimeToJSON(obj, tc, &(GET_TC(tc)->longValue), 0);
1802+
NPY_DATETIMEUNIT base =
1803+
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1804+
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
18091805
tc->type = JT_LONG;
18101806
}
18111807
return;
18121808
} else if (PyTime_Check(obj)) {
18131809
PRINTMARK();
1814-
pc->PyTypeToJSON = PyTimeToJSON;
1810+
pc->PyTypeToUTF8 = PyTimeToJSON;
18151811
tc->type = JT_UTF8;
18161812
return;
18171813
} else if (PyArray_IsScalar(obj, Datetime)) {
@@ -1823,8 +1819,17 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
18231819
}
18241820

18251821
PRINTMARK();
1826-
pc->PyTypeToJSON = NpyDateTimeScalarToJSON;
1827-
tc->type = enc->datetimeIso ? JT_UTF8 : JT_LONG;
1822+
if (enc->datetimeIso) {
1823+
PRINTMARK();
1824+
pc->PyTypeToUTF8 = PyDateTimeToIso;
1825+
tc->type = JT_UTF8;
1826+
} else {
1827+
PRINTMARK();
1828+
NPY_DATETIMEUNIT base =
1829+
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1830+
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
1831+
tc->type = JT_LONG;
1832+
}
18281833
return;
18291834
} else if (PyDelta_Check(obj)) {
18301835
if (PyObject_HasAttrString(obj, "value")) {
@@ -2226,7 +2231,7 @@ void Object_endTypeContext(JSOBJ obj, JSONTypeContext *tc) {
22262231

22272232
const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc,
22282233
size_t *_outLen) {
2229-
return GET_TC(tc)->PyTypeToJSON(obj, tc, NULL, _outLen);
2234+
return GET_TC(tc)->PyTypeToUTF8(obj, tc, _outLen);
22302235
}
22312236

22322237
JSINT64 Object_getLongValue(JSOBJ obj, JSONTypeContext *tc) {

0 commit comments

Comments
 (0)