diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 7f2aab569ab71..8d44a54fbd988 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -12,7 +12,12 @@ from pandas._typing import JSONSerializable from pandas.errors import AbstractMethodError -from pandas.core.dtypes.common import ensure_str, is_period_dtype +from pandas.core.dtypes.common import ( + pandas_dtype, + ensure_str, + is_period_dtype, + is_categorical_dtype, +) from pandas import DataFrame, MultiIndex, Series, isna, to_datetime from pandas.core.construction import create_series_with_explicit_dtype @@ -892,7 +897,8 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): ) if dtype is not None: try: - dtype = np.dtype(dtype) + if not is_categorical_dtype(dtype): + dtype = pandas_dtype(dtype) return data.astype(dtype), True except (TypeError, ValueError): return data, False diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 09d8a1d3f10ea..686a0ce99f9aa 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -8,6 +8,7 @@ import pytest from pandas.compat import is_platform_32bit, is_platform_windows +from pandas.core.dtypes.common import is_categorical import pandas.util._test_decorators as td import pandas as pd @@ -1197,6 +1198,17 @@ def test_read_local_jsonl(self): expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) tm.assert_frame_equal(result, expected) + def test_read_json_category_dtype(self): + json = ( + '{"a": 0, "b": "A"}\n' + '{"a": 1, "b": "B"}\n' + '{"a": 2, "b": "A"}\n' + '{"a": 3, "b": "B"}\n' + ) + json = StringIO(json) + result = read_json(json, lines=True, dtype={"b": "category"}) + assert is_categorical(result["b"]) + def test_read_jsonl_unicode_chars(self): # GH15132: non-ascii unicode characters # \u201d == RIGHT DOUBLE QUOTATION MARK