Skip to content

Commit 72207d4

Browse files
author
Chris Bertinato
committed
BUG: Timedelta not formatted correctly in to_json
1 parent 4fb853f commit 72207d4

File tree

6 files changed

+150
-38
lines changed

6 files changed

+150
-38
lines changed

asv_bench/benchmarks/io/json.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ class ToJSON(BaseIO):
6666
fname = "__test__.json"
6767
params = [
6868
["split", "columns", "index", "values", "records"],
69-
["df", "df_date_idx", "df_td_int_ts", "df_int_floats", "df_int_float_str"],
69+
["df", "df_date_idx", "df_td", "df_td_int_ts", "df_int_floats",
70+
"df_int_float_str"],
7071
]
7172
param_names = ["orient", "frame"]
7273

@@ -81,6 +82,13 @@ def setup(self, orient, frame):
8182
strings = tm.makeStringIndex(N)
8283
self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N))
8384
self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index)
85+
self.df_td = DataFrame(
86+
{
87+
"td_1": timedeltas,
88+
"td_2": timedeltas
89+
},
90+
index=index,
91+
)
8492
self.df_td_int_ts = DataFrame(
8593
{
8694
"td_1": timedeltas,
@@ -118,6 +126,10 @@ def setup(self, orient, frame):
118126
def time_to_json(self, orient, frame):
119127
getattr(self, frame).to_json(self.fname, orient=orient)
120128

129+
def time_to_json_iso(self, orient, frame):
130+
getattr(self, frame).to_json(self.fname, orient=orient,
131+
date_format="iso")
132+
121133
def peakmem_to_json(self, orient, frame):
122134
getattr(self, frame).to_json(self.fname, orient=orient)
123135

doc/source/whatsnew/v0.25.2.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,9 @@ I/O
6363

6464
- Fix regression in notebook display where <th> tags not used for :attr:`DataFrame.index` (:issue:`28204`).
6565
- Regression in :meth:`~DataFrame.to_csv` where writing a :class:`Series` or :class:`DataFrame` indexed by an :class:`IntervalIndex` would incorrectly raise a ``TypeError`` (:issue:`28210`)
66+
- Bug in :meth:`DataFrame.to_json` and :meth:`Series.to_json` where :class:`Timedelta` was not correctly formatted when `date_format="iso"` (:issue:`28256`).
6667
-
67-
-
68+
6869

6970
Plotting
7071
^^^^^^^^

pandas/_libs/src/ujson/python/objToJSON.c

+40-33
Original file line numberDiff line numberDiff line change
@@ -1917,47 +1917,54 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
19171917
tc->type = enc->datetimeIso ? JT_UTF8 : JT_LONG;
19181918
return;
19191919
} else if (PyDelta_Check(obj)) {
1920-
if (PyObject_HasAttrString(obj, "value")) {
1920+
if (enc->datetimeIso) {
19211921
PRINTMARK();
1922-
value = get_long_attr(obj, "value");
1922+
pc->PyTypeToJSON = PyTimeToJSON;
1923+
tc->type = JT_UTF8;
1924+
19231925
} else {
1924-
PRINTMARK();
1925-
value = total_seconds(obj) * 1000000000LL; // nanoseconds per second
1926-
}
1926+
if (PyObject_HasAttrString(obj, "value")) {
1927+
PRINTMARK();
1928+
value = get_long_attr(obj, "value");
1929+
} else {
1930+
PRINTMARK();
1931+
value = total_seconds(obj) * 1000000000LL; // nanoseconds per second
1932+
}
19271933

1928-
base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1929-
switch (base) {
1930-
case NPY_FR_ns:
1931-
break;
1932-
case NPY_FR_us:
1933-
value /= 1000LL;
1934-
break;
1935-
case NPY_FR_ms:
1936-
value /= 1000000LL;
1937-
break;
1938-
case NPY_FR_s:
1939-
value /= 1000000000LL;
1940-
break;
1941-
}
1934+
base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1935+
switch (base) {
1936+
case NPY_FR_ns:
1937+
break;
1938+
case NPY_FR_us:
1939+
value /= 1000LL;
1940+
break;
1941+
case NPY_FR_ms:
1942+
value /= 1000000LL;
1943+
break;
1944+
case NPY_FR_s:
1945+
value /= 1000000000LL;
1946+
break;
1947+
}
19421948

1943-
exc = PyErr_Occurred();
1949+
exc = PyErr_Occurred();
19441950

1945-
if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) {
1946-
PRINTMARK();
1947-
goto INVALID;
1948-
}
1951+
if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) {
1952+
PRINTMARK();
1953+
goto INVALID;
1954+
}
19491955

1950-
if (value == get_nat()) {
1951-
PRINTMARK();
1952-
tc->type = JT_NULL;
1953-
return;
1954-
}
1956+
if (value == get_nat()) {
1957+
PRINTMARK();
1958+
tc->type = JT_NULL;
1959+
return;
1960+
}
19551961

1956-
GET_TC(tc)->longValue = value;
1962+
GET_TC(tc)->longValue = value;
19571963

1958-
PRINTMARK();
1959-
pc->PyTypeToJSON = PyLongToINT64;
1960-
tc->type = JT_LONG;
1964+
PRINTMARK();
1965+
pc->PyTypeToJSON = PyLongToINT64;
1966+
tc->type = JT_LONG;
1967+
}
19611968
return;
19621969
} else if (PyArray_IsScalar(obj, Integer)) {
19631970
PRINTMARK();

pandas/io/json/_json.py

+60-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas._libs.tslibs import iNaT
1111
from pandas.errors import AbstractMethodError
1212

13-
from pandas.core.dtypes.common import ensure_str, is_period_dtype
13+
from pandas.core.dtypes.common import ensure_str, is_period_dtype, is_timedelta64_dtype
1414

1515
from pandas import DataFrame, MultiIndex, Series, isna, to_datetime
1616
from pandas._typing import Scalar
@@ -171,6 +171,34 @@ def _write(
171171
class SeriesWriter(Writer):
172172
_default_orient = "index"
173173

174+
def __init__(
175+
self,
176+
obj,
177+
orient: Optional[str],
178+
date_format: str,
179+
double_precision: int,
180+
ensure_ascii: bool,
181+
date_unit: str,
182+
index: bool,
183+
default_handler: Optional[Callable[[Any], Serializable]] = None,
184+
indent: int = 0,
185+
):
186+
super().__init__(
187+
obj,
188+
orient,
189+
date_format,
190+
double_precision,
191+
ensure_ascii,
192+
date_unit,
193+
index,
194+
default_handler=default_handler,
195+
indent=indent,
196+
)
197+
198+
if is_timedelta64_dtype(obj.dtype) and self.date_format == "iso":
199+
obj = obj.copy()
200+
self.obj = obj.apply(lambda x: x.isoformat())
201+
174202
def _format_axes(self):
175203
if not self.obj.index.is_unique and self.orient == "index":
176204
raise ValueError(
@@ -206,6 +234,37 @@ def _write(
206234
class FrameWriter(Writer):
207235
_default_orient = "columns"
208236

237+
def __init__(
238+
self,
239+
obj,
240+
orient: Optional[str],
241+
date_format: str,
242+
double_precision: int,
243+
ensure_ascii: bool,
244+
date_unit: str,
245+
index: bool,
246+
default_handler: Optional[Callable[[Any], Serializable]] = None,
247+
indent: int = 0,
248+
):
249+
super().__init__(
250+
obj,
251+
orient,
252+
date_format,
253+
double_precision,
254+
ensure_ascii,
255+
date_unit,
256+
index,
257+
default_handler=default_handler,
258+
indent=indent,
259+
)
260+
261+
obj = obj.copy()
262+
timedeltas = obj.select_dtypes(include=["timedelta"]).columns
263+
264+
if len(timedeltas) and self.date_format == "iso":
265+
obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat())
266+
self.obj = obj
267+
209268
def _format_axes(self):
210269
"""
211270
Try to format axes if they are datelike.

pandas/tests/io/json/test_json_table_schema.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -613,8 +613,7 @@ def test_timestamp_in_columns(self):
613613
result = df.to_json(orient="table")
614614
js = json.loads(result)
615615
assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000Z"
616-
# TODO - below expectation is not correct; see GH 28256
617-
assert js["schema"]["fields"][2]["name"] == 10000
616+
assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S"
618617

619618
@pytest.mark.parametrize(
620619
"case",

pandas/tests/io/json/test_pandas.py

+34
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,40 @@ def test_reconstruction_index(self):
813813
result = read_json(df.to_json())
814814
assert_frame_equal(result, df)
815815

816+
@pytest.mark.parametrize(
817+
"date_format,expected",
818+
[
819+
("iso", '{"0":"P1DT0H0M0S","1":"P2DT0H0M0S"}'),
820+
("epoch", '{"0":86400000,"1":172800000}'),
821+
],
822+
)
823+
def test_series_timedelta_to_json(self, date_format, expected):
824+
# GH28156: to_json not correctly formatting Timedelta
825+
s = Series(pd.timedelta_range(start="1D", periods=2))
826+
827+
result = s.to_json(date_format=date_format)
828+
assert result == expected
829+
830+
result = s.astype(object).to_json(date_format=date_format)
831+
assert result == expected
832+
833+
@pytest.mark.parametrize(
834+
"date_format,expected",
835+
[
836+
("iso", '{"0":{"0":"P1DT0H0M0S","1":"P2DT0H0M0S"}}'),
837+
("epoch", '{"0":{"0":86400000,"1":172800000}}'),
838+
],
839+
)
840+
def test_dataframe_timedelta_to_json(self, date_format, expected):
841+
# GH28156: to_json not correctly formatting Timedelta
842+
df = DataFrame(pd.timedelta_range(start="1D", periods=2))
843+
844+
result = df.to_json(date_format=date_format)
845+
assert result == expected
846+
847+
result = df.astype(object).to_json(date_format=date_format)
848+
assert result == expected
849+
816850
def test_path(self):
817851
with ensure_clean("test.json") as path:
818852
for df in [

0 commit comments

Comments
 (0)