diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index cea42cbffa906..41964f7fd3997 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -653,6 +653,7 @@ I/O - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`) - :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`) - Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`) +- Parse missing values using :func:`read_json` with ``dtype=False`` to ``NaN`` instead of ``None`` (:issue:`28501`) - :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`) Period diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 8129d58d5cb34..e1feb1aa3fada 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -20,7 +20,7 @@ from pandas.core.dtypes.common import ensure_str, is_period_dtype -from pandas import DataFrame, MultiIndex, Series, isna, to_datetime +from pandas import DataFrame, MultiIndex, Series, isna, notna, to_datetime from pandas.core import generic from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.generic import NDFrame @@ -858,7 +858,10 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): # don't try to coerce, unless a force conversion if use_dtypes: if not self.dtype: - return data, False + if all(notna(data)): + return data, False + return data.fillna(np.nan), True + elif self.dtype is True: pass else: diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 3e5f9d481ce48..fdf2caa804def 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -345,10 +345,16 @@ def test_frame_from_json_missing_data(self, orient, convert_axes, numpy, dtype): convert_axes=convert_axes, dtype=dtype, ) - if not dtype: # TODO: Special case for object data; maybe a bug? - assert result.iloc[0, 2] is None - else: - assert np.isnan(result.iloc[0, 2]) + assert np.isnan(result.iloc[0, 2]) + + @pytest.mark.parametrize("dtype", [True, False]) + def test_frame_read_json_dtype_missing_value(self, orient, dtype): + # GH28501 Parse missing values using read_json with dtype=False + # to NaN instead of None + result = read_json("[null]", dtype=dtype) + expected = DataFrame([np.nan]) + + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("inf", [np.inf, np.NINF]) @pytest.mark.parametrize("dtype", [True, False])