From 1e1f3014f24f029268e0e1454380a663c82e49df Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sat, 14 Nov 2020 16:45:29 +0100 Subject: [PATCH 1/2] BUG: Parse missing values using read_json with dtype=False to NaN instead of None (GH28501) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/io/json/_json.py | 7 +++++-- pandas/tests/io/json/test_pandas.py | 14 ++++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 2db090e377beb..2151a9027dc85 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -566,6 +566,7 @@ I/O - Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`) - :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`) +- Parse missing values using :func:`read_json` with ``dtype=False`` to NaN instead of None (:issue:`28501`) Plotting ^^^^^^^^ diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index f30007f6ed907..4bc6d997496d4 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -20,7 +20,7 @@ from pandas.core.dtypes.common import ensure_str, is_period_dtype -from pandas import DataFrame, MultiIndex, Series, isna, to_datetime +from pandas import DataFrame, MultiIndex, Series, isna, notna, to_datetime from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.generic import NDFrame from pandas.core.reshape.concat import concat @@ -861,7 +861,10 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): # don't try to coerce, unless a force conversion if use_dtypes: if not self.dtype: - return data, False + if all(notna(data)): + return data, False + return data.fillna(np.nan), True + elif self.dtype is True: pass else: diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 3e5f9d481ce48..fdf2caa804def 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -345,10 +345,16 @@ def test_frame_from_json_missing_data(self, orient, convert_axes, numpy, dtype): convert_axes=convert_axes, dtype=dtype, ) - if not dtype: # TODO: Special case for object data; maybe a bug? - assert result.iloc[0, 2] is None - else: - assert np.isnan(result.iloc[0, 2]) + assert np.isnan(result.iloc[0, 2]) + + @pytest.mark.parametrize("dtype", [True, False]) + def test_frame_read_json_dtype_missing_value(self, orient, dtype): + # GH28501 Parse missing values using read_json with dtype=False + # to NaN instead of None + result = read_json("[null]", dtype=dtype) + expected = DataFrame([np.nan]) + + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("inf", [np.inf, np.NINF]) @pytest.mark.parametrize("dtype", [True, False]) From 5370b833cb0212408c9218fa893413e55500d0e8 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sun, 15 Nov 2020 18:18:46 +0100 Subject: [PATCH 2/2] Update whatsnew entry --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 0ae01e836fbe8..4945bc2b153a6 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -624,7 +624,7 @@ I/O - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`) - :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`) - Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`) -- Parse missing values using :func:`read_json` with ``dtype=False`` to NaN instead of None (:issue:`28501`) +- Parse missing values using :func:`read_json` with ``dtype=False`` to ``NaN`` instead of ``None`` (:issue:`28501`) Plotting ^^^^^^^^