From f5360469ac4c0cd63a94d69a6c984dea07a1e701 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sun, 24 Feb 2019 21:37:54 +0100 Subject: [PATCH 1/7] Fix bug when index type casting in read_json table (#25433) --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/io/json/json.py | 13 ++++++++++++- pandas/tests/io/json/test_pandas.py | 8 ++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 170e7f14da397..60b0d828ac4d2 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -194,6 +194,7 @@ I/O - Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`) - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`) +- Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`) - - - diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 725e2d28ffd67..0836ba71d158f 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -227,7 +227,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii, def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, - convert_axes=True, convert_dates=True, keep_default_dates=True, + convert_axes=None, convert_dates=True, keep_default_dates=True, numpy=False, precise_float=False, date_unit=None, encoding=None, lines=False, chunksize=None, compression='infer'): """ @@ -289,6 +289,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, convert_axes : boolean, default True Try to convert the axes to the proper dtypes. + + Not applicable with ``orient='table'``. + + .. versionchanged:: 0.25 + + Not applicable with ``orient='table'``. + convert_dates : boolean, default True List of columns to parse for dates; If True, then try to parse datelike columns default is True; a column label is datelike if @@ -417,8 +424,12 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, if orient == 'table' and dtype: raise ValueError("cannot pass both dtype and orient='table'") + if orient == 'table' and convert_axes: + raise ValueError("cannot pass both convert_axes and orient='table'") dtype = orient != 'table' if dtype is None else dtype + if convert_axes is None: + convert_axes = orient != 'table' compression = _infer_compression(path_or_buf, compression) filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index fecd0f0572757..80f7e6ff09f82 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1202,6 +1202,14 @@ def test_data_frame_size_after_to_json(self): assert size_before == size_after + @pytest.mark.parametrize('index', [None, [1., 2.], ['1', '2'], + ['1.', '2.']]) + def test_from_json_to_json_table_index(self, index): + expected = DataFrame({'a': [1, 2]}, index=index) + dfjson = expected.to_json(orient='table') + result = pd.read_json(dfjson, orient='table') + assert_frame_equal(result, expected) + def test_from_json_to_json_table_dtypes(self): # GH21345 expected = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']}) From 98333dadc688f57c3f78133fdafb118104ad34af Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sun, 24 Feb 2019 21:40:59 +0100 Subject: [PATCH 2/7] Fix test --- pandas/tests/io/json/test_json_table_schema.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 3002d1dfb5f8a..04fadabce9fbe 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -564,17 +564,11 @@ def test_multiindex(self, index_names): result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) - @pytest.mark.parametrize("strict_check", [ - pytest.param(True, marks=pytest.mark.xfail), - False - ]) - def test_empty_frame_roundtrip(self, strict_check): + def test_empty_frame_roundtrip(self): # GH 21287 df = pd.DataFrame([], columns=['a', 'b', 'c']) expected = df.copy() out = df.to_json(orient='table') result = pd.read_json(out, orient='table') # TODO: When DF coercion issue (#21345) is resolved tighten type checks - tm.assert_frame_equal(expected, result, - check_dtype=strict_check, - check_index_type=strict_check) + tm.assert_frame_equal(expected, result) From 7cf9716530f728e2b58bee7d223755337c0c250a Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sun, 24 Feb 2019 22:40:32 +0100 Subject: [PATCH 3/7] Add tests (#25435) --- pandas/tests/io/json/test_pandas.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 80f7e6ff09f82..ce6de319bb5c5 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1202,10 +1202,12 @@ def test_data_frame_size_after_to_json(self): assert size_before == size_after - @pytest.mark.parametrize('index', [None, [1., 2.], ['1', '2'], - ['1.', '2.']]) - def test_from_json_to_json_table_index(self, index): - expected = DataFrame({'a': [1, 2]}, index=index) + @pytest.mark.parametrize('index', [None, [1, 2], [1., 2.], ['a', 'b'], + ['1', '2'], ['1.', '2.']]) + @pytest.mark.parametrize('columns', [['a', 'b'], ['1', '2'], ['1.', '2.']]) + def test_from_json_to_json_table_index_and_columns(self, index, columns): + # GH25433 GH25435 + expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns) dfjson = expected.to_json(orient='table') result = pd.read_json(dfjson, orient='table') assert_frame_equal(result, expected) From c0bce78177ebd486d0bd209d0fd65d3b2e6883af Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Mon, 25 Feb 2019 19:57:44 +0100 Subject: [PATCH 4/7] Address requested changes --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/io/json/json.py | 8 ++------ pandas/tests/io/json/test_pandas.py | 7 +++++++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 60b0d828ac4d2..642093db3ded6 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -195,6 +195,7 @@ I/O - Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`) - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`) - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`) +- Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`) - - - diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 0836ba71d158f..4973d5b5a6353 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -281,18 +281,14 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, If True, infer dtypes; if a dict of column to dtype, then use those; if False, then don't infer dtypes at all, applies only to the data. - Not applicable with ``orient='table'``. - - .. versionchanged:: 0.25 + .. versionchanged:: 0.25.0 Not applicable with ``orient='table'``. convert_axes : boolean, default True Try to convert the axes to the proper dtypes. - Not applicable with ``orient='table'``. - - .. versionchanged:: 0.25 + .. versionchanged:: 0.25.0 Not applicable with ``orient='table'``. diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ce6de319bb5c5..0ab20924ae8cc 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1227,6 +1227,13 @@ def test_read_json_table_dtype_raises(self, dtype): with pytest.raises(ValueError): pd.read_json(dfjson, orient='table', dtype=dtype) + def test_read_json_table_convert_axes_raises(self): + # GH25433 GH25435 + df = DataFrame([[1, 2], [3, 4]], index=[1., 2.], columns=['1.', '2.']) + dfjson = df.to_json(orient='table') + with pytest.raises(ValueError): + pd.read_json(dfjson, orient='table', convert_axes=True) + @pytest.mark.parametrize('data, expected', [ (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']), {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}), From 85fddb084aaf392908b16d35cac64ab718d9a9de Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Mon, 25 Feb 2019 23:52:52 +0100 Subject: [PATCH 5/7] Test error message matches --- pandas/tests/io/json/test_pandas.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 0ab20924ae8cc..39cb9ebc2fee1 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1224,14 +1224,16 @@ def test_read_json_table_dtype_raises(self, dtype): # GH21345 df = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']}) dfjson = df.to_json(orient='table') - with pytest.raises(ValueError): + msg = "cannot pass both dtype and orient='table'" + with pytest.raises(ValueError, match=msg): pd.read_json(dfjson, orient='table', dtype=dtype) def test_read_json_table_convert_axes_raises(self): # GH25433 GH25435 df = DataFrame([[1, 2], [3, 4]], index=[1., 2.], columns=['1.', '2.']) dfjson = df.to_json(orient='table') - with pytest.raises(ValueError): + msg = "cannot pass both convert_axes and orient='table'" + with pytest.raises(ValueError, match=msg): pd.read_json(dfjson, orient='table', convert_axes=True) @pytest.mark.parametrize('data, expected', [ From fd0822b77015edd5b31c72f60ee6c2a153072c0a Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Tue, 26 Feb 2019 06:58:19 +0100 Subject: [PATCH 6/7] Address requested changes --- pandas/io/json/json.py | 21 ++++++++++++------- .../tests/io/json/test_json_table_schema.py | 1 - 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 4973d5b5a6353..4bae067ee5196 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -277,20 +277,24 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, 'table' as an allowed value for the ``orient`` argument typ : type of object to recover (series or frame), default 'frame' - dtype : boolean or dict, default True + dtype : boolean or dict, default None If True, infer dtypes; if a dict of column to dtype, then use those; if False, then don't infer dtypes at all, applies only to the data. + For all ``orient`` values except ``'table'``, default is True. + .. versionchanged:: 0.25.0 - Not applicable with ``orient='table'``. + Not applicable for ``orient='table'``. - convert_axes : boolean, default True + convert_axes : boolean, default None Try to convert the axes to the proper dtypes. + For all ``orient`` values except ``'table'``, default is True. + .. versionchanged:: 0.25.0 - Not applicable with ``orient='table'``. + Not applicable for ``orient='table'``. convert_dates : boolean, default True List of columns to parse for dates; If True, then try to parse @@ -423,9 +427,10 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, if orient == 'table' and convert_axes: raise ValueError("cannot pass both convert_axes and orient='table'") - dtype = orient != 'table' if dtype is None else dtype - if convert_axes is None: - convert_axes = orient != 'table' + if dtype is None and orient != 'table': + dtype = True + if convert_axes is None and orient != 'table': + convert_axes = True compression = _infer_compression(path_or_buf, compression) filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( @@ -699,7 +704,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, # don't try to coerce, unless a force conversion if use_dtypes: - if self.dtype is False: + if not self.dtype: return data, False elif self.dtype is True: pass diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 04fadabce9fbe..351b495e5d8fc 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -570,5 +570,4 @@ def test_empty_frame_roundtrip(self): expected = df.copy() out = df.to_json(orient='table') result = pd.read_json(out, orient='table') - # TODO: When DF coercion issue (#21345) is resolved tighten type checks tm.assert_frame_equal(expected, result) From 5c5f3d699e28bbc95f32d7cc247b4992ae0901d8 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Tue, 26 Feb 2019 08:10:10 +0100 Subject: [PATCH 7/7] Fix test --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 39cb9ebc2fee1..ed598b730d960 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -194,7 +194,7 @@ def _check_orient(df, orient, dtype=None, numpy=False, else: unser = unser.sort_index() - if dtype is False: + if not dtype: check_dtype = False if not convert_axes and df.index.dtype.type == np.datetime64: