Skip to content

Commit 15d8178

Browse files
albertvillanovajreback
authored andcommitted
BUG: Fix type coercion in read_json orient='table' (#21345) (#25219)
1 parent 7408c9b commit 15d8178

File tree

4 files changed

+34
-6
lines changed

4 files changed

+34
-6
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ I/O
160160
^^^
161161

162162
- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
163+
- Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
163164
-
164165
-
165166
-

pandas/io/json/json.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii,
226226
return serialized
227227

228228

229-
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
229+
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None,
230230
convert_axes=True, convert_dates=True, keep_default_dates=True,
231231
numpy=False, precise_float=False, date_unit=None, encoding=None,
232232
lines=False, chunksize=None, compression='infer'):
@@ -278,8 +278,15 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
278278
279279
typ : type of object to recover (series or frame), default 'frame'
280280
dtype : boolean or dict, default True
281-
If True, infer dtypes, if a dict of column to dtype, then use those,
281+
If True, infer dtypes; if a dict of column to dtype, then use those;
282282
if False, then don't infer dtypes at all, applies only to the data.
283+
284+
Not applicable with ``orient='table'``.
285+
286+
.. versionchanged:: 0.25
287+
288+
Not applicable with ``orient='table'``.
289+
283290
convert_axes : boolean, default True
284291
Try to convert the axes to the proper dtypes.
285292
convert_dates : boolean, default True
@@ -408,6 +415,11 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
408415
{"index": "row 2", "col 1": "c", "col 2": "d"}]}'
409416
"""
410417

418+
if orient == 'table' and dtype:
419+
raise ValueError("cannot pass both dtype and orient='table'")
420+
421+
dtype = orient != 'table' if dtype is None else dtype
422+
411423
compression = _infer_compression(path_or_buf, compression)
412424
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
413425
path_or_buf, encoding=encoding, compression=compression,
@@ -600,15 +612,15 @@ class Parser(object):
600612
'us': long(31536000000000),
601613
'ns': long(31536000000000000)}
602614

603-
def __init__(self, json, orient, dtype=True, convert_axes=True,
615+
def __init__(self, json, orient, dtype=None, convert_axes=True,
604616
convert_dates=True, keep_default_dates=False, numpy=False,
605617
precise_float=False, date_unit=None):
606618
self.json = json
607619

608620
if orient is None:
609621
orient = self._default_orient
610-
611622
self.orient = orient
623+
612624
self.dtype = dtype
613625

614626
if orient == "split":

pandas/tests/io/json/test_json_table_schema.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -502,12 +502,12 @@ class TestTableOrientReader(object):
502502
@pytest.mark.parametrize("vals", [
503503
{'ints': [1, 2, 3, 4]},
504504
{'objects': ['a', 'b', 'c', 'd']},
505+
{'objects': ['1', '2', '3', '4']},
505506
{'date_ranges': pd.date_range('2016-01-01', freq='d', periods=4)},
506507
{'categoricals': pd.Series(pd.Categorical(['a', 'b', 'c', 'c']))},
507508
{'ordered_cats': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
508509
ordered=True))},
509-
pytest.param({'floats': [1., 2., 3., 4.]},
510-
marks=pytest.mark.xfail),
510+
{'floats': [1., 2., 3., 4.]},
511511
{'floats': [1.1, 2.2, 3.3, 4.4]},
512512
{'bools': [True, False, False, True]}])
513513
def test_read_json_table_orient(self, index_nm, vals, recwarn):

pandas/tests/io/json/test_pandas.py

+15
Original file line numberDiff line numberDiff line change
@@ -1202,6 +1202,21 @@ def test_data_frame_size_after_to_json(self):
12021202

12031203
assert size_before == size_after
12041204

1205+
def test_from_json_to_json_table_dtypes(self):
1206+
# GH21345
1207+
expected = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']})
1208+
dfjson = expected.to_json(orient='table')
1209+
result = pd.read_json(dfjson, orient='table')
1210+
assert_frame_equal(result, expected)
1211+
1212+
@pytest.mark.parametrize('dtype', [True, {'b': int, 'c': int}])
1213+
def test_read_json_table_dtype_raises(self, dtype):
1214+
# GH21345
1215+
df = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']})
1216+
dfjson = df.to_json(orient='table')
1217+
with pytest.raises(ValueError):
1218+
pd.read_json(dfjson, orient='table', dtype=dtype)
1219+
12051220
@pytest.mark.parametrize('data, expected', [
12061221
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
12071222
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),

0 commit comments

Comments
 (0)