diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index 5c1d39776b91c..e7baf60b9f65b 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -66,7 +66,8 @@ class ToJSON(BaseIO): fname = "__test__.json" params = [ ["split", "columns", "index", "values", "records"], - ["df", "df_date_idx", "df_td_int_ts", "df_int_floats", "df_int_float_str"], + ["df", "df_date_idx", "df_td", "df_td_int_ts", "df_int_floats", + "df_int_float_str"], ] param_names = ["orient", "frame"] @@ -81,6 +82,13 @@ def setup(self, orient, frame): strings = tm.makeStringIndex(N) self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N)) self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index) + self.df_td = DataFrame( + { + "td_1": timedeltas, + "td_2": timedeltas + }, + index=index, + ) self.df_td_int_ts = DataFrame( { "td_1": timedeltas, @@ -118,6 +126,10 @@ def setup(self, orient, frame): def time_to_json(self, orient, frame): getattr(self, frame).to_json(self.fname, orient=orient) + def time_to_json_iso(self, orient, frame): + getattr(self, frame).to_json(self.fname, orient=orient, + date_format="iso") + def peakmem_to_json(self, orient, frame): getattr(self, frame).to_json(self.fname, orient=orient) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 14682b706f924..99b476d6e0ea9 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -63,8 +63,9 @@ I/O - Fix regression in notebook display where tags not used for :attr:`DataFrame.index` (:issue:`28204`). - Regression in :meth:`~DataFrame.to_csv` where writing a :class:`Series` or :class:`DataFrame` indexed by an :class:`IntervalIndex` would incorrectly raise a ``TypeError`` (:issue:`28210`) +- Bug in :meth:`DataFrame.to_json` and :meth:`Series.to_json` where :class:`Timedelta` was not correctly formatted when `date_format="iso"` (:issue:`28256`). - -- + Plotting ^^^^^^^^ diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 22c42acea0150..5889e635127de 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -735,12 +735,20 @@ int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { NpyArr_freeItemValue(obj, tc); if (PyArray_ISDATETIME(npyarr->array)) { - PRINTMARK(); - GET_TC(tc)->itemValue = obj; - Py_INCREF(obj); - ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array); - ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; - ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + if (PyArray_TYPE(npyarr->array) == NPY_TIMEDELTA) { + PRINTMARK(); + PyObject *item = npyarr->getitem(npyarr->dataptr, npyarr->array); + PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item); + GET_TC(tc)->itemValue = td; + Py_DECREF(item); + } else { + PRINTMARK(); + GET_TC(tc)->itemValue = obj; + Py_INCREF(obj); + ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array); + ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + } } else { PRINTMARK(); GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array); @@ -1917,47 +1925,54 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { tc->type = enc->datetimeIso ? JT_UTF8 : JT_LONG; return; } else if (PyDelta_Check(obj)) { - if (PyObject_HasAttrString(obj, "value")) { + if (enc->datetimeIso) { PRINTMARK(); - value = get_long_attr(obj, "value"); + pc->PyTypeToJSON = PyTimeToJSON; + tc->type = JT_UTF8; + } else { - PRINTMARK(); - value = total_seconds(obj) * 1000000000LL; // nanoseconds per second - } + if (PyObject_HasAttrString(obj, "value")) { + PRINTMARK(); + value = get_long_attr(obj, "value"); + } else { + PRINTMARK(); + value = total_seconds(obj) * 1000000000LL; // nanoseconds per second + } - base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - switch (base) { - case NPY_FR_ns: - break; - case NPY_FR_us: - value /= 1000LL; - break; - case NPY_FR_ms: - value /= 1000000LL; - break; - case NPY_FR_s: - value /= 1000000000LL; - break; - } + base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + switch (base) { + case NPY_FR_ns: + break; + case NPY_FR_us: + value /= 1000LL; + break; + case NPY_FR_ms: + value /= 1000000LL; + break; + case NPY_FR_s: + value /= 1000000000LL; + break; + } - exc = PyErr_Occurred(); + exc = PyErr_Occurred(); - if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { - PRINTMARK(); - goto INVALID; - } + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + PRINTMARK(); + goto INVALID; + } - if (value == get_nat()) { - PRINTMARK(); - tc->type = JT_NULL; - return; - } + if (value == get_nat()) { + PRINTMARK(); + tc->type = JT_NULL; + return; + } - GET_TC(tc)->longValue = value; + GET_TC(tc)->longValue = value; - PRINTMARK(); - pc->PyTypeToJSON = PyLongToINT64; - tc->type = JT_LONG; + PRINTMARK(); + pc->PyTypeToJSON = PyLongToINT64; + tc->type = JT_LONG; + } return; } else if (PyArray_IsScalar(obj, Integer)) { PRINTMARK(); diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 73f4985e201f1..3e441a928b94f 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -10,7 +10,7 @@ from pandas._libs.tslibs import iNaT from pandas.errors import AbstractMethodError -from pandas.core.dtypes.common import ensure_str, is_period_dtype +from pandas.core.dtypes.common import ensure_str, is_period_dtype, is_timedelta64_dtype from pandas import DataFrame, MultiIndex, Series, isna, to_datetime from pandas._typing import Scalar @@ -170,6 +170,34 @@ def _write( class SeriesWriter(Writer): _default_orient = "index" + # + # def __init__( + # self, + # obj, + # orient: Optional[str], + # date_format: str, + # double_precision: int, + # ensure_ascii: bool, + # date_unit: str, + # index: bool, + # default_handler: Optional[Callable[[Any], Serializable]] = None, + # indent: int = 0, + # ): + # super().__init__( + # obj, + # orient, + # date_format, + # double_precision, + # ensure_ascii, + # date_unit, + # index, + # default_handler=default_handler, + # indent=indent, + # ) + # + # if is_timedelta64_dtype(obj.dtype) and self.date_format == "iso": + # obj = obj.copy() + # self.obj = obj.apply(lambda x: x.isoformat()) def _format_axes(self): if not self.obj.index.is_unique and self.orient == "index": @@ -206,6 +234,37 @@ def _write( class FrameWriter(Writer): _default_orient = "columns" + # def __init__( + # self, + # obj, + # orient: Optional[str], + # date_format: str, + # double_precision: int, + # ensure_ascii: bool, + # date_unit: str, + # index: bool, + # default_handler: Optional[Callable[[Any], Serializable]] = None, + # indent: int = 0, + # ): + # super().__init__( + # obj, + # orient, + # date_format, + # double_precision, + # ensure_ascii, + # date_unit, + # index, + # default_handler=default_handler, + # indent=indent, + # ) + # + # obj = obj.copy() + # timedeltas = obj.select_dtypes(include=["timedelta"]).columns + # + # if len(timedeltas) and self.date_format == "iso": + # obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) + # self.obj = obj + def _format_axes(self): """ Try to format axes if they are datelike. diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 569e299860614..5892c88484175 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -613,8 +613,7 @@ def test_timestamp_in_columns(self): result = df.to_json(orient="table") js = json.loads(result) assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000Z" - # TODO - below expectation is not correct; see GH 28256 - assert js["schema"]["fields"][2]["name"] == 10000 + assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S" @pytest.mark.parametrize( "case", diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 415b1d81eb3e4..f29e62af9114d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -813,6 +813,40 @@ def test_reconstruction_index(self): result = read_json(df.to_json()) assert_frame_equal(result, df) + @pytest.mark.parametrize( + "date_format,expected", + [ + ("iso", '{"0":"P1DT0H0M0S","1":"P2DT0H0M0S"}'), + ("epoch", '{"0":86400000,"1":172800000}'), + ], + ) + def test_series_timedelta_to_json(self, date_format, expected): + # GH28156: to_json not correctly formatting Timedelta + s = Series(pd.timedelta_range(start="1D", periods=2)) + + result = s.to_json(date_format=date_format) + assert result == expected + + result = s.astype(object).to_json(date_format=date_format) + assert result == expected + + @pytest.mark.parametrize( + "date_format,expected", + [ + ("iso", '{"0":{"0":"P1DT0H0M0S","1":"P2DT0H0M0S"}}'), + ("epoch", '{"0":{"0":86400000,"1":172800000}}'), + ], + ) + def test_dataframe_timedelta_to_json(self, date_format, expected): + # GH28156: to_json not correctly formatting Timedelta + df = DataFrame(pd.timedelta_range(start="1D", periods=2)) + + result = df.to_json(date_format=date_format) + assert result == expected + + result = df.astype(object).to_json(date_format=date_format) + assert result == expected + def test_path(self): with ensure_clean("test.json") as path: for df in [