From 72207d4337220221175fb4120e9ad2f60bb995f1 Mon Sep 17 00:00:00 2001 From: Chris Bertinato Date: Thu, 26 Sep 2019 08:58:03 -0400 Subject: [PATCH 1/2] BUG: Timedelta not formatted correctly in to_json --- asv_bench/benchmarks/io/json.py | 14 +++- doc/source/whatsnew/v0.25.2.rst | 3 +- pandas/_libs/src/ujson/python/objToJSON.c | 73 ++++++++++--------- pandas/io/json/_json.py | 61 +++++++++++++++- .../tests/io/json/test_json_table_schema.py | 3 +- pandas/tests/io/json/test_pandas.py | 34 +++++++++ 6 files changed, 150 insertions(+), 38 deletions(-) diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index 5c1d39776b91c..e7baf60b9f65b 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -66,7 +66,8 @@ class ToJSON(BaseIO): fname = "__test__.json" params = [ ["split", "columns", "index", "values", "records"], - ["df", "df_date_idx", "df_td_int_ts", "df_int_floats", "df_int_float_str"], + ["df", "df_date_idx", "df_td", "df_td_int_ts", "df_int_floats", + "df_int_float_str"], ] param_names = ["orient", "frame"] @@ -81,6 +82,13 @@ def setup(self, orient, frame): strings = tm.makeStringIndex(N) self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N)) self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index) + self.df_td = DataFrame( + { + "td_1": timedeltas, + "td_2": timedeltas + }, + index=index, + ) self.df_td_int_ts = DataFrame( { "td_1": timedeltas, @@ -118,6 +126,10 @@ def setup(self, orient, frame): def time_to_json(self, orient, frame): getattr(self, frame).to_json(self.fname, orient=orient) + def time_to_json_iso(self, orient, frame): + getattr(self, frame).to_json(self.fname, orient=orient, + date_format="iso") + def peakmem_to_json(self, orient, frame): getattr(self, frame).to_json(self.fname, orient=orient) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 14682b706f924..99b476d6e0ea9 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -63,8 +63,9 @@ I/O - Fix regression in notebook display where tags not used for :attr:`DataFrame.index` (:issue:`28204`). - Regression in :meth:`~DataFrame.to_csv` where writing a :class:`Series` or :class:`DataFrame` indexed by an :class:`IntervalIndex` would incorrectly raise a ``TypeError`` (:issue:`28210`) +- Bug in :meth:`DataFrame.to_json` and :meth:`Series.to_json` where :class:`Timedelta` was not correctly formatted when `date_format="iso"` (:issue:`28256`). - -- + Plotting ^^^^^^^^ diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 22c42acea0150..baf83b732f2f8 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -1917,47 +1917,54 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { tc->type = enc->datetimeIso ? JT_UTF8 : JT_LONG; return; } else if (PyDelta_Check(obj)) { - if (PyObject_HasAttrString(obj, "value")) { + if (enc->datetimeIso) { PRINTMARK(); - value = get_long_attr(obj, "value"); + pc->PyTypeToJSON = PyTimeToJSON; + tc->type = JT_UTF8; + } else { - PRINTMARK(); - value = total_seconds(obj) * 1000000000LL; // nanoseconds per second - } + if (PyObject_HasAttrString(obj, "value")) { + PRINTMARK(); + value = get_long_attr(obj, "value"); + } else { + PRINTMARK(); + value = total_seconds(obj) * 1000000000LL; // nanoseconds per second + } - base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - switch (base) { - case NPY_FR_ns: - break; - case NPY_FR_us: - value /= 1000LL; - break; - case NPY_FR_ms: - value /= 1000000LL; - break; - case NPY_FR_s: - value /= 1000000000LL; - break; - } + base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + switch (base) { + case NPY_FR_ns: + break; + case NPY_FR_us: + value /= 1000LL; + break; + case NPY_FR_ms: + value /= 1000000LL; + break; + case NPY_FR_s: + value /= 1000000000LL; + break; + } - exc = PyErr_Occurred(); + exc = PyErr_Occurred(); - if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { - PRINTMARK(); - goto INVALID; - } + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + PRINTMARK(); + goto INVALID; + } - if (value == get_nat()) { - PRINTMARK(); - tc->type = JT_NULL; - return; - } + if (value == get_nat()) { + PRINTMARK(); + tc->type = JT_NULL; + return; + } - GET_TC(tc)->longValue = value; + GET_TC(tc)->longValue = value; - PRINTMARK(); - pc->PyTypeToJSON = PyLongToINT64; - tc->type = JT_LONG; + PRINTMARK(); + pc->PyTypeToJSON = PyLongToINT64; + tc->type = JT_LONG; + } return; } else if (PyArray_IsScalar(obj, Integer)) { PRINTMARK(); diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 73f4985e201f1..ee29a4b2b441e 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -10,7 +10,7 @@ from pandas._libs.tslibs import iNaT from pandas.errors import AbstractMethodError -from pandas.core.dtypes.common import ensure_str, is_period_dtype +from pandas.core.dtypes.common import ensure_str, is_period_dtype, is_timedelta64_dtype from pandas import DataFrame, MultiIndex, Series, isna, to_datetime from pandas._typing import Scalar @@ -171,6 +171,34 @@ def _write( class SeriesWriter(Writer): _default_orient = "index" + def __init__( + self, + obj, + orient: Optional[str], + date_format: str, + double_precision: int, + ensure_ascii: bool, + date_unit: str, + index: bool, + default_handler: Optional[Callable[[Any], Serializable]] = None, + indent: int = 0, + ): + super().__init__( + obj, + orient, + date_format, + double_precision, + ensure_ascii, + date_unit, + index, + default_handler=default_handler, + indent=indent, + ) + + if is_timedelta64_dtype(obj.dtype) and self.date_format == "iso": + obj = obj.copy() + self.obj = obj.apply(lambda x: x.isoformat()) + def _format_axes(self): if not self.obj.index.is_unique and self.orient == "index": raise ValueError( @@ -206,6 +234,37 @@ def _write( class FrameWriter(Writer): _default_orient = "columns" + def __init__( + self, + obj, + orient: Optional[str], + date_format: str, + double_precision: int, + ensure_ascii: bool, + date_unit: str, + index: bool, + default_handler: Optional[Callable[[Any], Serializable]] = None, + indent: int = 0, + ): + super().__init__( + obj, + orient, + date_format, + double_precision, + ensure_ascii, + date_unit, + index, + default_handler=default_handler, + indent=indent, + ) + + obj = obj.copy() + timedeltas = obj.select_dtypes(include=["timedelta"]).columns + + if len(timedeltas) and self.date_format == "iso": + obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) + self.obj = obj + def _format_axes(self): """ Try to format axes if they are datelike. diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 569e299860614..5892c88484175 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -613,8 +613,7 @@ def test_timestamp_in_columns(self): result = df.to_json(orient="table") js = json.loads(result) assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000Z" - # TODO - below expectation is not correct; see GH 28256 - assert js["schema"]["fields"][2]["name"] == 10000 + assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S" @pytest.mark.parametrize( "case", diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 415b1d81eb3e4..f29e62af9114d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -813,6 +813,40 @@ def test_reconstruction_index(self): result = read_json(df.to_json()) assert_frame_equal(result, df) + @pytest.mark.parametrize( + "date_format,expected", + [ + ("iso", '{"0":"P1DT0H0M0S","1":"P2DT0H0M0S"}'), + ("epoch", '{"0":86400000,"1":172800000}'), + ], + ) + def test_series_timedelta_to_json(self, date_format, expected): + # GH28156: to_json not correctly formatting Timedelta + s = Series(pd.timedelta_range(start="1D", periods=2)) + + result = s.to_json(date_format=date_format) + assert result == expected + + result = s.astype(object).to_json(date_format=date_format) + assert result == expected + + @pytest.mark.parametrize( + "date_format,expected", + [ + ("iso", '{"0":{"0":"P1DT0H0M0S","1":"P2DT0H0M0S"}}'), + ("epoch", '{"0":{"0":86400000,"1":172800000}}'), + ], + ) + def test_dataframe_timedelta_to_json(self, date_format, expected): + # GH28156: to_json not correctly formatting Timedelta + df = DataFrame(pd.timedelta_range(start="1D", periods=2)) + + result = df.to_json(date_format=date_format) + assert result == expected + + result = df.astype(object).to_json(date_format=date_format) + assert result == expected + def test_path(self): with ensure_clean("test.json") as path: for df in [ From 928697e49dff903e92e23d9dccdb6b95247d02ae Mon Sep 17 00:00:00 2001 From: Chris Bertinato Date: Sat, 5 Oct 2019 12:30:12 -0400 Subject: [PATCH 2/2] WIP --- pandas/_libs/src/ujson/python/objToJSON.c | 20 ++-- pandas/io/json/_json.py | 116 +++++++++++----------- 2 files changed, 72 insertions(+), 64 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index baf83b732f2f8..5889e635127de 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -735,12 +735,20 @@ int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { NpyArr_freeItemValue(obj, tc); if (PyArray_ISDATETIME(npyarr->array)) { - PRINTMARK(); - GET_TC(tc)->itemValue = obj; - Py_INCREF(obj); - ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array); - ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; - ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + if (PyArray_TYPE(npyarr->array) == NPY_TIMEDELTA) { + PRINTMARK(); + PyObject *item = npyarr->getitem(npyarr->dataptr, npyarr->array); + PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item); + GET_TC(tc)->itemValue = td; + Py_DECREF(item); + } else { + PRINTMARK(); + GET_TC(tc)->itemValue = obj; + Py_INCREF(obj); + ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array); + ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + } } else { PRINTMARK(); GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array); diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index ee29a4b2b441e..3e441a928b94f 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -170,34 +170,34 @@ def _write( class SeriesWriter(Writer): _default_orient = "index" - - def __init__( - self, - obj, - orient: Optional[str], - date_format: str, - double_precision: int, - ensure_ascii: bool, - date_unit: str, - index: bool, - default_handler: Optional[Callable[[Any], Serializable]] = None, - indent: int = 0, - ): - super().__init__( - obj, - orient, - date_format, - double_precision, - ensure_ascii, - date_unit, - index, - default_handler=default_handler, - indent=indent, - ) - - if is_timedelta64_dtype(obj.dtype) and self.date_format == "iso": - obj = obj.copy() - self.obj = obj.apply(lambda x: x.isoformat()) + # + # def __init__( + # self, + # obj, + # orient: Optional[str], + # date_format: str, + # double_precision: int, + # ensure_ascii: bool, + # date_unit: str, + # index: bool, + # default_handler: Optional[Callable[[Any], Serializable]] = None, + # indent: int = 0, + # ): + # super().__init__( + # obj, + # orient, + # date_format, + # double_precision, + # ensure_ascii, + # date_unit, + # index, + # default_handler=default_handler, + # indent=indent, + # ) + # + # if is_timedelta64_dtype(obj.dtype) and self.date_format == "iso": + # obj = obj.copy() + # self.obj = obj.apply(lambda x: x.isoformat()) def _format_axes(self): if not self.obj.index.is_unique and self.orient == "index": @@ -234,36 +234,36 @@ def _write( class FrameWriter(Writer): _default_orient = "columns" - def __init__( - self, - obj, - orient: Optional[str], - date_format: str, - double_precision: int, - ensure_ascii: bool, - date_unit: str, - index: bool, - default_handler: Optional[Callable[[Any], Serializable]] = None, - indent: int = 0, - ): - super().__init__( - obj, - orient, - date_format, - double_precision, - ensure_ascii, - date_unit, - index, - default_handler=default_handler, - indent=indent, - ) - - obj = obj.copy() - timedeltas = obj.select_dtypes(include=["timedelta"]).columns - - if len(timedeltas) and self.date_format == "iso": - obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) - self.obj = obj + # def __init__( + # self, + # obj, + # orient: Optional[str], + # date_format: str, + # double_precision: int, + # ensure_ascii: bool, + # date_unit: str, + # index: bool, + # default_handler: Optional[Callable[[Any], Serializable]] = None, + # indent: int = 0, + # ): + # super().__init__( + # obj, + # orient, + # date_format, + # double_precision, + # ensure_ascii, + # date_unit, + # index, + # default_handler=default_handler, + # indent=indent, + # ) + # + # obj = obj.copy() + # timedeltas = obj.select_dtypes(include=["timedelta"]).columns + # + # if len(timedeltas) and self.date_format == "iso": + # obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) + # self.obj = obj def _format_axes(self): """