Skip to content

Commit 5c35871

Browse files
BUG: Parse missing values using read_json with dtype=False to NaN instead of None (GH28501) (#37834)
1 parent 793b635 commit 5c35871

File tree

3 files changed

+16
-6
lines changed

3 files changed

+16
-6
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,7 @@ I/O
653653
- Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`)
654654
- :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`)
655655
- Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`)
656+
- Parse missing values using :func:`read_json` with ``dtype=False`` to ``NaN`` instead of ``None`` (:issue:`28501`)
656657
- :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`)
657658

658659
Period

pandas/io/json/_json.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from pandas.core.dtypes.common import ensure_str, is_period_dtype
2222

23-
from pandas import DataFrame, MultiIndex, Series, isna, to_datetime
23+
from pandas import DataFrame, MultiIndex, Series, isna, notna, to_datetime
2424
from pandas.core import generic
2525
from pandas.core.construction import create_series_with_explicit_dtype
2626
from pandas.core.generic import NDFrame
@@ -858,7 +858,10 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
858858
# don't try to coerce, unless a force conversion
859859
if use_dtypes:
860860
if not self.dtype:
861-
return data, False
861+
if all(notna(data)):
862+
return data, False
863+
return data.fillna(np.nan), True
864+
862865
elif self.dtype is True:
863866
pass
864867
else:

pandas/tests/io/json/test_pandas.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -345,10 +345,16 @@ def test_frame_from_json_missing_data(self, orient, convert_axes, numpy, dtype):
345345
convert_axes=convert_axes,
346346
dtype=dtype,
347347
)
348-
if not dtype: # TODO: Special case for object data; maybe a bug?
349-
assert result.iloc[0, 2] is None
350-
else:
351-
assert np.isnan(result.iloc[0, 2])
348+
assert np.isnan(result.iloc[0, 2])
349+
350+
@pytest.mark.parametrize("dtype", [True, False])
351+
def test_frame_read_json_dtype_missing_value(self, orient, dtype):
352+
# GH28501 Parse missing values using read_json with dtype=False
353+
# to NaN instead of None
354+
result = read_json("[null]", dtype=dtype)
355+
expected = DataFrame([np.nan])
356+
357+
tm.assert_frame_equal(result, expected)
352358

353359
@pytest.mark.parametrize("inf", [np.inf, np.NINF])
354360
@pytest.mark.parametrize("dtype", [True, False])

0 commit comments

Comments
 (0)