Skip to content

Commit 9c33464

Browse files
WillAydjreback
authored andcommitted
JSON Date Handling 1.0 Regressions (#30977)
1 parent f873fb9 commit 9c33464

File tree

2 files changed

+102
-46
lines changed

2 files changed

+102
-46
lines changed

pandas/_libs/src/ujson/python/objToJSON.c

+76-46
Original file line numberDiff line numberDiff line change
@@ -456,8 +456,8 @@ static char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
456456
static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
457457
size_t *len) {
458458

459-
if (!PyDateTime_Check(obj)) {
460-
PyErr_SetString(PyExc_TypeError, "Expected datetime object");
459+
if (!PyDate_Check(obj)) {
460+
PyErr_SetString(PyExc_TypeError, "Expected date object");
461461
return NULL;
462462
}
463463

@@ -469,7 +469,7 @@ static npy_datetime PyDateTimeToEpoch(PyObject *obj, NPY_DATETIMEUNIT base) {
469469
npy_datetimestruct dts;
470470
int ret;
471471

472-
if (!PyDateTime_Check(obj)) {
472+
if (!PyDate_Check(obj)) {
473473
// TODO: raise TypeError
474474
}
475475
PyDateTime_Date *dt = (PyDateTime_Date *)obj;
@@ -1504,6 +1504,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
15041504
char **ret;
15051505
char *dataptr, *cLabel;
15061506
int type_num;
1507+
NPY_DATETIMEUNIT base = enc->datetimeUnit;
15071508
PRINTMARK();
15081509

15091510
if (!labels) {
@@ -1541,60 +1542,85 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
15411542
break;
15421543
}
15431544

1544-
// TODO: vectorized timedelta solution
1545-
if (enc->datetimeIso &&
1546-
(type_num == NPY_TIMEDELTA || PyDelta_Check(item))) {
1547-
PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item);
1548-
if (td == NULL) {
1549-
Py_DECREF(item);
1550-
NpyArr_freeLabels(ret, num);
1551-
ret = 0;
1552-
break;
1553-
}
1554-
1555-
PyObject *iso = PyObject_CallMethod(td, "isoformat", NULL);
1556-
Py_DECREF(td);
1557-
if (iso == NULL) {
1558-
Py_DECREF(item);
1559-
NpyArr_freeLabels(ret, num);
1560-
ret = 0;
1561-
break;
1562-
}
1563-
1564-
cLabel = (char *)PyUnicode_AsUTF8(iso);
1565-
Py_DECREF(iso);
1566-
len = strlen(cLabel);
1567-
} else if (PyTypeNum_ISDATETIME(type_num)) {
1568-
NPY_DATETIMEUNIT base = enc->datetimeUnit;
1569-
npy_int64 longVal;
1545+
int is_datetimelike = 0;
1546+
npy_int64 nanosecVal;
1547+
if (PyTypeNum_ISDATETIME(type_num)) {
1548+
is_datetimelike = 1;
15701549
PyArray_VectorUnaryFunc *castfunc =
15711550
PyArray_GetCastFunc(PyArray_DescrFromType(type_num), NPY_INT64);
15721551
if (!castfunc) {
15731552
PyErr_Format(PyExc_ValueError,
15741553
"Cannot cast numpy dtype %d to long",
15751554
enc->npyType);
15761555
}
1577-
castfunc(dataptr, &longVal, 1, NULL, NULL);
1578-
if (enc->datetimeIso) {
1579-
cLabel = int64ToIso(longVal, base, &len);
1556+
castfunc(dataptr, &nanosecVal, 1, NULL, NULL);
1557+
} else if (PyDate_Check(item) || PyDelta_Check(item)) {
1558+
is_datetimelike = 1;
1559+
if (PyObject_HasAttrString(item, "value")) {
1560+
nanosecVal = get_long_attr(item, "value");
15801561
} else {
1581-
if (!scaleNanosecToUnit(&longVal, base)) {
1582-
// TODO: This gets hit but somehow doesn't cause errors
1583-
// need to clean up (elsewhere in module as well)
1562+
if (PyDelta_Check(item)) {
1563+
nanosecVal = total_seconds(item) *
1564+
1000000000LL; // nanoseconds per second
1565+
} else {
1566+
// datetime.* objects don't follow above rules
1567+
nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns);
15841568
}
1585-
cLabel = PyObject_Malloc(21); // 21 chars for int64
1586-
sprintf(cLabel, "%" NPY_INT64_FMT, longVal);
1587-
len = strlen(cLabel);
15881569
}
1589-
} else if (PyDateTime_Check(item) || PyDate_Check(item)) {
1590-
NPY_DATETIMEUNIT base = enc->datetimeUnit;
1591-
if (enc->datetimeIso) {
1592-
cLabel = PyDateTimeToIso((PyDateTime_Date *)item, base, &len);
1570+
}
1571+
1572+
if (is_datetimelike) {
1573+
if (nanosecVal == get_nat()) {
1574+
len = 5; // TODO: shouldn't require extra space for terminator
1575+
cLabel = PyObject_Malloc(len);
1576+
strncpy(cLabel, "null", len);
15931577
} else {
1594-
cLabel = PyObject_Malloc(21); // 21 chars for int64
1595-
sprintf(cLabel, "%" NPY_DATETIME_FMT,
1596-
PyDateTimeToEpoch(item, base));
1597-
len = strlen(cLabel);
1578+
if (enc->datetimeIso) {
1579+
// TODO: Vectorized Timedelta function
1580+
if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) {
1581+
PyObject *td =
1582+
PyObject_CallFunction(cls_timedelta, "(O)", item);
1583+
if (td == NULL) {
1584+
Py_DECREF(item);
1585+
NpyArr_freeLabels(ret, num);
1586+
ret = 0;
1587+
break;
1588+
}
1589+
1590+
PyObject *iso =
1591+
PyObject_CallMethod(td, "isoformat", NULL);
1592+
Py_DECREF(td);
1593+
if (iso == NULL) {
1594+
Py_DECREF(item);
1595+
NpyArr_freeLabels(ret, num);
1596+
ret = 0;
1597+
break;
1598+
}
1599+
1600+
len = strlen(PyUnicode_AsUTF8(iso));
1601+
cLabel = PyObject_Malloc(len + 1);
1602+
memcpy(cLabel, PyUnicode_AsUTF8(iso), len + 1);
1603+
Py_DECREF(iso);
1604+
} else {
1605+
if (type_num == NPY_DATETIME) {
1606+
cLabel = int64ToIso(nanosecVal, base, &len);
1607+
} else {
1608+
cLabel = PyDateTimeToIso((PyDateTime_Date *)item,
1609+
base, &len);
1610+
}
1611+
}
1612+
if (cLabel == NULL) {
1613+
Py_DECREF(item);
1614+
NpyArr_freeLabels(ret, num);
1615+
ret = 0;
1616+
break;
1617+
}
1618+
} else {
1619+
cLabel = PyObject_Malloc(21); // 21 chars for int64
1620+
sprintf(cLabel, "%" NPY_DATETIME_FMT,
1621+
NpyDateTimeToEpoch(nanosecVal, base));
1622+
len = strlen(cLabel);
1623+
}
15981624
}
15991625
} else { // Fallback to string representation
16001626
PyObject *str = PyObject_Str(item);
@@ -1615,6 +1641,10 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
16151641
ret[i] = PyObject_Malloc(len + 1);
16161642
memcpy(ret[i], cLabel, len + 1);
16171643

1644+
if (is_datetimelike) {
1645+
PyObject_Free(cLabel);
1646+
}
1647+
16181648
if (PyErr_Occurred()) {
16191649
NpyArr_freeLabels(ret, num);
16201650
ret = 0;

pandas/tests/io/json/test_pandas.py

+26
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import OrderedDict
2+
import datetime
23
from datetime import timedelta
34
from io import StringIO
45
import json
@@ -810,6 +811,31 @@ def test_convert_dates(self):
810811
result = read_json(json, typ="series")
811812
tm.assert_series_equal(result, ts)
812813

814+
@pytest.mark.parametrize("date_format", ["epoch", "iso"])
815+
@pytest.mark.parametrize("as_object", [True, False])
816+
@pytest.mark.parametrize(
817+
"date_typ", [datetime.date, datetime.datetime, pd.Timestamp]
818+
)
819+
def test_date_index_and_values(self, date_format, as_object, date_typ):
820+
data = [date_typ(year=2020, month=1, day=1), pd.NaT]
821+
if as_object:
822+
data.append("a")
823+
824+
ser = pd.Series(data, index=data)
825+
result = ser.to_json(date_format=date_format)
826+
827+
if date_format == "epoch":
828+
expected = '{"1577836800000":1577836800000,"null":null}'
829+
else:
830+
expected = (
831+
'{"2020-01-01T00:00:00.000Z":"2020-01-01T00:00:00.000Z","null":null}'
832+
)
833+
834+
if as_object:
835+
expected = expected.replace("}", ',"a":"a"}')
836+
837+
assert result == expected
838+
813839
@pytest.mark.parametrize(
814840
"infer_word",
815841
[

0 commit comments

Comments
 (0)