Skip to content

Commit 80078ac

Browse files
authored
Implement C Level Timedelta ISO Function; fix JSON usage (#30903)
1 parent c4aa1a2 commit 80078ac

File tree

9 files changed

+155
-48
lines changed

9 files changed

+155
-48
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@ I/O
343343
timestamps with ``version="2.0"`` (:issue:`31652`).
344344
- Bug in :meth:`read_csv` was raising `TypeError` when `sep=None` was used in combination with `comment` keyword (:issue:`31396`)
345345
- Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`)
346+
- Bug in :meth:`DataFrame.to_json` where ``Timedelta`` objects would not be serialized correctly with ``date_format="iso"`` (:issue:`28256`)
346347
- :func:`read_csv` will raise a ``ValueError`` when the column names passed in `parse_dates` are missing in the Dataframe (:issue:`31251`)
347348
- Bug in :meth:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`)
348349
- Bug in :meth:`read_csv` was causing a file descriptor leak on an empty file (:issue:`31488`)

pandas/_libs/src/ujson/python/date_conversions.c

+26
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,29 @@ npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base) {
116116
npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts);
117117
return NpyDateTimeToEpoch(npy_dt, base);
118118
}
119+
120+
/* Converts the int64_t representation of a duration to ISO; mutates len */
121+
char *int64ToIsoDuration(int64_t value, size_t *len) {
122+
pandas_timedeltastruct tds;
123+
int ret_code;
124+
125+
pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds);
126+
127+
// Max theoretical length of ISO Duration with 64 bit day
128+
// as the largest unit is 70 characters + 1 for a null terminator
129+
char *result = PyObject_Malloc(71);
130+
if (result == NULL) {
131+
PyErr_NoMemory();
132+
return NULL;
133+
}
134+
135+
ret_code = make_iso_8601_timedelta(&tds, result, len);
136+
if (ret_code == -1) {
137+
PyErr_SetString(PyExc_ValueError,
138+
"Could not convert timedelta value to string");
139+
PyObject_Free(result);
140+
return NULL;
141+
}
142+
143+
return result;
144+
}

pandas/_libs/src/ujson/python/date_conversions.h

+2
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,6 @@ char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, size_t *len);
2828
// Convert a Python Date/Datetime to Unix epoch with resolution base
2929
npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base);
3030

31+
char *int64ToIsoDuration(int64_t value, size_t *len);
32+
3133
#endif

pandas/_libs/src/ujson/python/objToJSON.c

+36-45
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ void *initObjToJSON(void) {
165165
cls_index = (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Index");
166166
cls_series =
167167
(PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Series");
168-
cls_timedelta = PyObject_GetAttrString(mod_pandas, "Timedelta");
169168
Py_DECREF(mod_pandas);
170169
}
171170

@@ -357,6 +356,12 @@ static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
357356
return int64ToIso(GET_TC(tc)->longValue, base, len);
358357
}
359358

359+
/* JSON callback. returns a char* and mutates the pointer to *len */
360+
static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
361+
JSONTypeContext *tc, size_t *len) {
362+
return int64ToIsoDuration(GET_TC(tc)->longValue, len);
363+
}
364+
360365
/* JSON callback */
361366
static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
362367
size_t *len) {
@@ -1445,7 +1450,8 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
14451450
1000000000LL; // nanoseconds per second
14461451
} else {
14471452
// datetime.* objects don't follow above rules
1448-
nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns);
1453+
nanosecVal =
1454+
PyDateTimeToEpoch((PyDateTime_Date *)item, NPY_FR_ns);
14491455
}
14501456
}
14511457
}
@@ -1457,31 +1463,8 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
14571463
strncpy(cLabel, "null", len);
14581464
} else {
14591465
if (enc->datetimeIso) {
1460-
// TODO: Vectorized Timedelta function
14611466
if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) {
1462-
PyObject *td =
1463-
PyObject_CallFunction(cls_timedelta, "(O)", item);
1464-
if (td == NULL) {
1465-
Py_DECREF(item);
1466-
NpyArr_freeLabels(ret, num);
1467-
ret = 0;
1468-
break;
1469-
}
1470-
1471-
PyObject *iso =
1472-
PyObject_CallMethod(td, "isoformat", NULL);
1473-
Py_DECREF(td);
1474-
if (iso == NULL) {
1475-
Py_DECREF(item);
1476-
NpyArr_freeLabels(ret, num);
1477-
ret = 0;
1478-
break;
1479-
}
1480-
1481-
len = strlen(PyUnicode_AsUTF8(iso));
1482-
cLabel = PyObject_Malloc(len + 1);
1483-
memcpy(cLabel, PyUnicode_AsUTF8(iso), len + 1);
1484-
Py_DECREF(iso);
1467+
cLabel = int64ToIsoDuration(nanosecVal, &len);
14851468
} else {
14861469
if (type_num == NPY_DATETIME) {
14871470
cLabel = int64ToIso(nanosecVal, base, &len);
@@ -1614,7 +1597,11 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
16141597

16151598
if (enc->datetimeIso) {
16161599
PRINTMARK();
1617-
pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback;
1600+
if (enc->npyType == NPY_TIMEDELTA) {
1601+
pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback;
1602+
} else {
1603+
pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback;
1604+
}
16181605
// Currently no way to pass longVal to iso function, so use
16191606
// state management
16201607
GET_TC(tc)->longValue = longVal;
@@ -1695,7 +1682,8 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
16951682
PRINTMARK();
16961683
NPY_DATETIMEUNIT base =
16971684
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1698-
GET_TC(tc)->longValue = PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
1685+
GET_TC(tc)->longValue =
1686+
PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
16991687
tc->type = JT_LONG;
17001688
}
17011689
return;
@@ -1721,7 +1709,8 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
17211709
PRINTMARK();
17221710
NPY_DATETIMEUNIT base =
17231711
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1724-
GET_TC(tc)->longValue = PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
1712+
GET_TC(tc)->longValue =
1713+
PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
17251714
tc->type = JT_LONG;
17261715
}
17271716
return;
@@ -1734,28 +1723,30 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
17341723
value = total_seconds(obj) * 1000000000LL; // nanoseconds per second
17351724
}
17361725

1737-
unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1738-
if (scaleNanosecToUnit(&value, unit) != 0) {
1739-
// TODO: Add some kind of error handling here
1740-
}
1741-
1742-
exc = PyErr_Occurred();
1743-
1744-
if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) {
1745-
PRINTMARK();
1746-
goto INVALID;
1747-
}
1748-
1726+
PRINTMARK();
17491727
if (value == get_nat()) {
17501728
PRINTMARK();
17511729
tc->type = JT_NULL;
17521730
return;
1753-
}
1731+
} else if (enc->datetimeIso) {
1732+
pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback;
1733+
tc->type = JT_UTF8;
1734+
} else {
1735+
unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1736+
if (scaleNanosecToUnit(&value, unit) != 0) {
1737+
// TODO: Add some kind of error handling here
1738+
}
17541739

1755-
GET_TC(tc)->longValue = value;
1740+
exc = PyErr_Occurred();
17561741

1757-
PRINTMARK();
1758-
tc->type = JT_LONG;
1742+
if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) {
1743+
PRINTMARK();
1744+
goto INVALID;
1745+
}
1746+
1747+
tc->type = JT_LONG;
1748+
}
1749+
GET_TC(tc)->longValue = value;
17591750
return;
17601751
} else if (PyArray_IsScalar(obj, Integer)) {
17611752
PRINTMARK();

pandas/_libs/tslibs/src/datetime/np_datetime_strings.c

+34
Original file line numberDiff line numberDiff line change
@@ -905,3 +905,37 @@ int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
905905
outlen);
906906
return -1;
907907
}
908+
909+
910+
int make_iso_8601_timedelta(pandas_timedeltastruct *tds,
911+
char *outstr, size_t *outlen) {
912+
*outlen = 0;
913+
*outlen += snprintf(outstr, 60, // NOLINT
914+
"P%" NPY_INT64_FMT
915+
"DT%" NPY_INT32_FMT
916+
"H%" NPY_INT32_FMT
917+
"M%" NPY_INT32_FMT,
918+
tds->days, tds->hrs, tds->min, tds->sec);
919+
outstr += *outlen;
920+
921+
if (tds->ns != 0) {
922+
*outlen += snprintf(outstr, 12, // NOLINT
923+
".%03" NPY_INT32_FMT
924+
"%03" NPY_INT32_FMT
925+
"%03" NPY_INT32_FMT
926+
"S", tds->ms, tds->us, tds->ns);
927+
} else if (tds->us != 0) {
928+
*outlen += snprintf(outstr, 9, // NOLINT
929+
".%03" NPY_INT32_FMT
930+
"%03" NPY_INT32_FMT
931+
"S", tds->ms, tds->us);
932+
} else if (tds->ms != 0) {
933+
*outlen += snprintf(outstr, 6, // NOLINT
934+
".%03" NPY_INT32_FMT "S", tds->ms);
935+
} else {
936+
*outlen += snprintf(outstr, 2, // NOLINT
937+
"%s", "S");
938+
}
939+
940+
return 0;
941+
}

pandas/_libs/tslibs/src/datetime/np_datetime_strings.h

+10
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,14 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
7979
int
8080
make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
8181
NPY_DATETIMEUNIT base);
82+
83+
/*
84+
* Converts an pandas_timedeltastruct to an ISO 8601 string.
85+
*
86+
* Mutates outlen to provide size of (non-NULL terminated) string.
87+
*
88+
* Currently has no error handling
89+
*/
90+
int make_iso_8601_timedelta(pandas_timedeltastruct *tds, char *outstr,
91+
size_t *outlen);
8292
#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_

pandas/tests/io/json/test_json_table_schema.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -603,8 +603,7 @@ def test_timestamp_in_columns(self):
603603
result = df.to_json(orient="table")
604604
js = json.loads(result)
605605
assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000Z"
606-
# TODO - below expectation is not correct; see GH 28256
607-
assert js["schema"]["fields"][2]["name"] == 10000
606+
assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S"
608607

609608
@pytest.mark.parametrize(
610609
"case",

pandas/tests/io/json/test_pandas.py

+23
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,29 @@ def test_mixed_timedelta_datetime(self):
10571057
result = pd.read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"})
10581058
tm.assert_frame_equal(result, expected, check_index_type=False)
10591059

1060+
@pytest.mark.parametrize("as_object", [True, False])
1061+
@pytest.mark.parametrize("date_format", ["iso", "epoch"])
1062+
@pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta])
1063+
def test_timedelta_to_json(self, as_object, date_format, timedelta_typ):
1064+
# GH28156: to_json not correctly formatting Timedelta
1065+
data = [timedelta_typ(days=1), timedelta_typ(days=2), pd.NaT]
1066+
if as_object:
1067+
data.append("a")
1068+
1069+
ser = pd.Series(data, index=data)
1070+
if date_format == "iso":
1071+
expected = (
1072+
'{"P1DT0H0M0S":"P1DT0H0M0S","P2DT0H0M0S":"P2DT0H0M0S","null":null}'
1073+
)
1074+
else:
1075+
expected = '{"86400000":86400000,"172800000":172800000,"null":null}'
1076+
1077+
if as_object:
1078+
expected = expected.replace("}", ',"a":"a"}')
1079+
1080+
result = ser.to_json(date_format=date_format)
1081+
assert result == expected
1082+
10601083
def test_default_handler(self):
10611084
value = object()
10621085
frame = DataFrame({"a": [7, value]})

pandas/tests/io/json/test_ujson.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from pandas._libs.tslib import Timestamp
1717
import pandas.compat as compat
1818

19-
from pandas import DataFrame, DatetimeIndex, Index, NaT, Series, date_range
19+
from pandas import DataFrame, DatetimeIndex, Index, NaT, Series, Timedelta, date_range
2020
import pandas._testing as tm
2121

2222

@@ -1103,3 +1103,24 @@ def test_encode_set(self):
11031103

11041104
for v in dec:
11051105
assert v in s
1106+
1107+
@pytest.mark.parametrize(
1108+
"td",
1109+
[
1110+
Timedelta(days=366),
1111+
Timedelta(days=-1),
1112+
Timedelta(hours=13, minutes=5, seconds=5),
1113+
Timedelta(hours=13, minutes=20, seconds=30),
1114+
Timedelta(days=-1, nanoseconds=5),
1115+
Timedelta(nanoseconds=1),
1116+
Timedelta(microseconds=1, nanoseconds=1),
1117+
Timedelta(milliseconds=1, microseconds=1, nanoseconds=1),
1118+
Timedelta(milliseconds=999, microseconds=999, nanoseconds=999),
1119+
],
1120+
)
1121+
def test_encode_timedelta_iso(self, td):
1122+
# GH 28256
1123+
result = ujson.encode(td, iso_dates=True)
1124+
expected = f'"{td.isoformat()}"'
1125+
1126+
assert result == expected

0 commit comments

Comments
 (0)