Skip to content

Commit 6a1c6b4

Browse files
BUG: Fix dtypes for read_json (#42819)
* Fix dtypes for read_json * Address comments * Add whatsnew entry * Update doc/source/whatsnew/v1.4.0.rst Co-authored-by: Matthew Zeitlin <[email protected]> * Linting Co-authored-by: Matthew Zeitlin <[email protected]>
1 parent 0eeda64 commit 6a1c6b4

File tree

3 files changed

+32
-8
lines changed

3 files changed

+32
-8
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ I/O
465465
- Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
466466
- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
467467
- Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
468+
- Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)
468469
- Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
469470
- Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
470471
-

pandas/io/json/_json.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -876,11 +876,8 @@ def check_keys_split(self, decoded):
876876

877877
def parse(self):
878878

879-
# try numpy
880-
numpy = self.numpy
881-
if numpy:
879+
if self.numpy:
882880
self._parse_numpy()
883-
884881
else:
885882
self._parse_no_numpy()
886883

@@ -941,10 +938,6 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
941938
)
942939
if dtype is not None:
943940
try:
944-
# error: Argument 1 to "dtype" has incompatible type
945-
# "Union[ExtensionDtype, str, dtype[Any], Type[object]]";
946-
# expected "Type[Any]"
947-
dtype = np.dtype(dtype) # type: ignore[arg-type]
948941
return data.astype(dtype), True
949942
except (TypeError, ValueError):
950943
return data, False

pandas/tests/io/json/test_pandas.py

+30
Original file line numberDiff line numberDiff line change
@@ -1387,6 +1387,36 @@ def test_from_json_to_json_table_dtypes(self):
13871387
result = read_json(dfjson, orient="table")
13881388
tm.assert_frame_equal(result, expected)
13891389

1390+
@pytest.mark.parametrize("orient", ["split", "records", "index", "columns"])
1391+
def test_to_json_from_json_columns_dtypes(self, orient):
1392+
# GH21892 GH33205
1393+
expected = DataFrame.from_dict(
1394+
{
1395+
"Integer": Series([1, 2, 3], dtype="int64"),
1396+
"Float": Series([None, 2.0, 3.0], dtype="float64"),
1397+
"Object": Series([None, "", "c"], dtype="object"),
1398+
"Bool": Series([True, False, True], dtype="bool"),
1399+
"Category": Series(["a", "b", None], dtype="category"),
1400+
"Datetime": Series(
1401+
["2020-01-01", None, "2020-01-03"], dtype="datetime64[ns]"
1402+
),
1403+
}
1404+
)
1405+
dfjson = expected.to_json(orient=orient)
1406+
result = read_json(
1407+
dfjson,
1408+
orient=orient,
1409+
dtype={
1410+
"Integer": "int64",
1411+
"Float": "float64",
1412+
"Object": "object",
1413+
"Bool": "bool",
1414+
"Category": "category",
1415+
"Datetime": "datetime64[ns]",
1416+
},
1417+
)
1418+
tm.assert_frame_equal(result, expected)
1419+
13901420
@pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}])
13911421
def test_read_json_table_dtype_raises(self, dtype):
13921422
# GH21345

0 commit comments

Comments
 (0)