diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e540626dc1e14..24d14e49b3db0 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -341,6 +341,7 @@ I/O timestamps with ``version="2.0"`` (:issue:`31652`). - Bug in :meth:`read_csv` was raising `TypeError` when `sep=None` was used in combination with `comment` keyword (:issue:`31396`) - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`) +- Bug in :meth:`DataFrame.to_json` where ``Timedelta`` objects would not be serialized correctly with ``date_format="iso"`` (:issue:`28256`) - :func:`read_csv` will raise a ``ValueError`` when the column names passed in `parse_dates` are missing in the Dataframe (:issue:`31251`) - Bug in :meth:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`) - Bug in :meth:`read_csv` was causing a file descriptor leak on an empty file (:issue:`31488`) diff --git a/pandas/_libs/src/ujson/python/date_conversions.c b/pandas/_libs/src/ujson/python/date_conversions.c index fc4bdef8463af..bcb1334d978ef 100644 --- a/pandas/_libs/src/ujson/python/date_conversions.c +++ b/pandas/_libs/src/ujson/python/date_conversions.c @@ -116,3 +116,29 @@ npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base) { npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); return NpyDateTimeToEpoch(npy_dt, base); } + +/* Converts the int64_t representation of a duration to ISO; mutates len */ +char *int64ToIsoDuration(int64_t value, size_t *len) { + pandas_timedeltastruct tds; + int ret_code; + + pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); + + // Max theoretical length of ISO Duration with 64 bit day + // as the largest unit is 70 characters + 1 for a null terminator + char *result = PyObject_Malloc(71); + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + + ret_code = make_iso_8601_timedelta(&tds, result, len); + if (ret_code == -1) { + PyErr_SetString(PyExc_ValueError, + "Could not convert timedelta value to string"); + PyObject_Free(result); + return NULL; + } + + return result; +} diff --git a/pandas/_libs/src/ujson/python/date_conversions.h b/pandas/_libs/src/ujson/python/date_conversions.h index 45455f4d6128b..1b5cbf2a7e307 100644 --- a/pandas/_libs/src/ujson/python/date_conversions.h +++ b/pandas/_libs/src/ujson/python/date_conversions.h @@ -28,4 +28,6 @@ char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, size_t *len); // Convert a Python Date/Datetime to Unix epoch with resolution base npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base); +char *int64ToIsoDuration(int64_t value, size_t *len); + #endif diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index db1feccb4d0c6..95e98779c2368 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -165,7 +165,6 @@ void *initObjToJSON(void) { cls_index = (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Index"); cls_series = (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Series"); - cls_timedelta = PyObject_GetAttrString(mod_pandas, "Timedelta"); Py_DECREF(mod_pandas); } @@ -357,6 +356,12 @@ static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused), return int64ToIso(GET_TC(tc)->longValue, base, len); } +/* JSON callback. returns a char* and mutates the pointer to *len */ +static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused), + JSONTypeContext *tc, size_t *len) { + return int64ToIsoDuration(GET_TC(tc)->longValue, len); +} + /* JSON callback */ static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, size_t *len) { @@ -1445,7 +1450,8 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, 1000000000LL; // nanoseconds per second } else { // datetime.* objects don't follow above rules - nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns); + nanosecVal = + PyDateTimeToEpoch((PyDateTime_Date *)item, NPY_FR_ns); } } } @@ -1457,31 +1463,8 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, strncpy(cLabel, "null", len); } else { if (enc->datetimeIso) { - // TODO: Vectorized Timedelta function if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { - PyObject *td = - PyObject_CallFunction(cls_timedelta, "(O)", item); - if (td == NULL) { - Py_DECREF(item); - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - PyObject *iso = - PyObject_CallMethod(td, "isoformat", NULL); - Py_DECREF(td); - if (iso == NULL) { - Py_DECREF(item); - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - len = strlen(PyUnicode_AsUTF8(iso)); - cLabel = PyObject_Malloc(len + 1); - memcpy(cLabel, PyUnicode_AsUTF8(iso), len + 1); - Py_DECREF(iso); + cLabel = int64ToIsoDuration(nanosecVal, &len); } else { if (type_num == NPY_DATETIME) { cLabel = int64ToIso(nanosecVal, base, &len); @@ -1614,7 +1597,11 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { if (enc->datetimeIso) { PRINTMARK(); - pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback; + if (enc->npyType == NPY_TIMEDELTA) { + pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; + } else { + pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback; + } // Currently no way to pass longVal to iso function, so use // state management GET_TC(tc)->longValue = longVal; @@ -1695,7 +1682,8 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { PRINTMARK(); NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - GET_TC(tc)->longValue = PyDateTimeToEpoch((PyDateTime_Date *)obj, base); + GET_TC(tc)->longValue = + PyDateTimeToEpoch((PyDateTime_Date *)obj, base); tc->type = JT_LONG; } return; @@ -1721,7 +1709,8 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { PRINTMARK(); NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - GET_TC(tc)->longValue = PyDateTimeToEpoch((PyDateTime_Date *)obj, base); + GET_TC(tc)->longValue = + PyDateTimeToEpoch((PyDateTime_Date *)obj, base); tc->type = JT_LONG; } return; @@ -1734,28 +1723,30 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { value = total_seconds(obj) * 1000000000LL; // nanoseconds per second } - unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - if (scaleNanosecToUnit(&value, unit) != 0) { - // TODO: Add some kind of error handling here - } - - exc = PyErr_Occurred(); - - if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { - PRINTMARK(); - goto INVALID; - } - + PRINTMARK(); if (value == get_nat()) { PRINTMARK(); tc->type = JT_NULL; return; - } + } else if (enc->datetimeIso) { + pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; + tc->type = JT_UTF8; + } else { + unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + if (scaleNanosecToUnit(&value, unit) != 0) { + // TODO: Add some kind of error handling here + } - GET_TC(tc)->longValue = value; + exc = PyErr_Occurred(); - PRINTMARK(); - tc->type = JT_LONG; + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + PRINTMARK(); + goto INVALID; + } + + tc->type = JT_LONG; + } + GET_TC(tc)->longValue = value; return; } else if (PyArray_IsScalar(obj, Integer)) { PRINTMARK(); diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c index 54ed6ecff21e2..b245ae5880ecb 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -905,3 +905,37 @@ int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, outlen); return -1; } + + +int make_iso_8601_timedelta(pandas_timedeltastruct *tds, + char *outstr, size_t *outlen) { + *outlen = 0; + *outlen += snprintf(outstr, 60, // NOLINT + "P%" NPY_INT64_FMT + "DT%" NPY_INT32_FMT + "H%" NPY_INT32_FMT + "M%" NPY_INT32_FMT, + tds->days, tds->hrs, tds->min, tds->sec); + outstr += *outlen; + + if (tds->ns != 0) { + *outlen += snprintf(outstr, 12, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us, tds->ns); + } else if (tds->us != 0) { + *outlen += snprintf(outstr, 9, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us); + } else if (tds->ms != 0) { + *outlen += snprintf(outstr, 6, // NOLINT + ".%03" NPY_INT32_FMT "S", tds->ms); + } else { + *outlen += snprintf(outstr, 2, // NOLINT + "%s", "S"); + } + + return 0; +} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h index 880c34ea77638..200a71ff0c2b7 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h @@ -79,4 +79,14 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base); int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, NPY_DATETIMEUNIT base); + +/* + * Converts an pandas_timedeltastruct to an ISO 8601 string. + * + * Mutates outlen to provide size of (non-NULL terminated) string. + * + * Currently has no error handling + */ +int make_iso_8601_timedelta(pandas_timedeltastruct *tds, char *outstr, + size_t *outlen); #endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 2ac2acc6748d1..c0d40048a72fe 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -603,8 +603,7 @@ def test_timestamp_in_columns(self): result = df.to_json(orient="table") js = json.loads(result) assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000Z" - # TODO - below expectation is not correct; see GH 28256 - assert js["schema"]["fields"][2]["name"] == 10000 + assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S" @pytest.mark.parametrize( "case", diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 276dfd666c5d0..d56ddb98fa4fa 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1057,6 +1057,29 @@ def test_mixed_timedelta_datetime(self): result = pd.read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"}) tm.assert_frame_equal(result, expected, check_index_type=False) + @pytest.mark.parametrize("as_object", [True, False]) + @pytest.mark.parametrize("date_format", ["iso", "epoch"]) + @pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta]) + def test_timedelta_to_json(self, as_object, date_format, timedelta_typ): + # GH28156: to_json not correctly formatting Timedelta + data = [timedelta_typ(days=1), timedelta_typ(days=2), pd.NaT] + if as_object: + data.append("a") + + ser = pd.Series(data, index=data) + if date_format == "iso": + expected = ( + '{"P1DT0H0M0S":"P1DT0H0M0S","P2DT0H0M0S":"P2DT0H0M0S","null":null}' + ) + else: + expected = '{"86400000":86400000,"172800000":172800000,"null":null}' + + if as_object: + expected = expected.replace("}", ',"a":"a"}') + + result = ser.to_json(date_format=date_format) + assert result == expected + def test_default_handler(self): value = object() frame = DataFrame({"a": [7, value]}) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index e86667626deda..34dd9ba9bc7b6 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -16,7 +16,7 @@ from pandas._libs.tslib import Timestamp import pandas.compat as compat -from pandas import DataFrame, DatetimeIndex, Index, NaT, Series, date_range +from pandas import DataFrame, DatetimeIndex, Index, NaT, Series, Timedelta, date_range import pandas._testing as tm @@ -1103,3 +1103,24 @@ def test_encode_set(self): for v in dec: assert v in s + + @pytest.mark.parametrize( + "td", + [ + Timedelta(days=366), + Timedelta(days=-1), + Timedelta(hours=13, minutes=5, seconds=5), + Timedelta(hours=13, minutes=20, seconds=30), + Timedelta(days=-1, nanoseconds=5), + Timedelta(nanoseconds=1), + Timedelta(microseconds=1, nanoseconds=1), + Timedelta(milliseconds=1, microseconds=1, nanoseconds=1), + Timedelta(milliseconds=999, microseconds=999, nanoseconds=999), + ], + ) + def test_encode_timedelta_iso(self, td): + # GH 28256 + result = ujson.encode(td, iso_dates=True) + expected = f'"{td.isoformat()}"' + + assert result == expected