Skip to content

Commit f6eee83

Browse files
authored
BUG: read_json not handling string dtype when converting to dates (#56195)
1 parent a29e4f6 commit f6eee83

File tree

3 files changed

+27
-15
lines changed

3 files changed

+27
-15
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,7 @@ I/O
531531
- Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`)
532532
- Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`)
533533
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
534+
- Bug in :func:`read_json` not handling dtype conversion properly if ``infer_string`` is set (:issue:`56195`)
534535
- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)
535536
- Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`)
536537
- Bug in :meth:`pandas.read_excel` with ``engine="odf"`` (``ods`` files) when string contains annotation (:issue:`55200`)

pandas/io/json/_json.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@
3232
from pandas.util._exceptions import find_stack_level
3333
from pandas.util._validators import check_dtype_backend
3434

35-
from pandas.core.dtypes.common import ensure_str
35+
from pandas.core.dtypes.common import (
36+
ensure_str,
37+
is_string_dtype,
38+
)
3639
from pandas.core.dtypes.dtypes import PeriodDtype
3740
from pandas.core.dtypes.generic import ABCIndex
3841

@@ -1249,7 +1252,7 @@ def _try_convert_data(
12491252
if self.dtype_backend is not lib.no_default and not isinstance(data, ABCIndex):
12501253
# Fall through for conversion later on
12511254
return data, True
1252-
elif data.dtype == "object":
1255+
elif is_string_dtype(data.dtype):
12531256
# try float
12541257
try:
12551258
data = data.astype("float64")
@@ -1301,6 +1304,10 @@ def _try_convert_to_date(self, data):
13011304
return data, False
13021305

13031306
new_data = data
1307+
1308+
if new_data.dtype == "string":
1309+
new_data = new_data.astype(object)
1310+
13041311
if new_data.dtype == "object":
13051312
try:
13061313
new_data = data.astype("int64")

pandas/tests/io/json/test_compression.py

+17-13
Original file line numberDiff line numberDiff line change
@@ -93,27 +93,31 @@ def test_read_unsupported_compression_type():
9393
pd.read_json(path, compression="unsupported")
9494

9595

96+
@pytest.mark.parametrize(
97+
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
98+
)
9699
@pytest.mark.parametrize("to_infer", [True, False])
97100
@pytest.mark.parametrize("read_infer", [True, False])
98101
def test_to_json_compression(
99-
compression_only, read_infer, to_infer, compression_to_extension
102+
compression_only, read_infer, to_infer, compression_to_extension, infer_string
100103
):
101-
# see gh-15008
102-
compression = compression_only
104+
with pd.option_context("future.infer_string", infer_string):
105+
# see gh-15008
106+
compression = compression_only
103107

104-
# We'll complete file extension subsequently.
105-
filename = "test."
106-
filename += compression_to_extension[compression]
108+
# We'll complete file extension subsequently.
109+
filename = "test."
110+
filename += compression_to_extension[compression]
107111

108-
df = pd.DataFrame({"A": [1]})
112+
df = pd.DataFrame({"A": [1]})
109113

110-
to_compression = "infer" if to_infer else compression
111-
read_compression = "infer" if read_infer else compression
114+
to_compression = "infer" if to_infer else compression
115+
read_compression = "infer" if read_infer else compression
112116

113-
with tm.ensure_clean(filename) as path:
114-
df.to_json(path, compression=to_compression)
115-
result = pd.read_json(path, compression=read_compression)
116-
tm.assert_frame_equal(result, df)
117+
with tm.ensure_clean(filename) as path:
118+
df.to_json(path, compression=to_compression)
119+
result = pd.read_json(path, compression=read_compression)
120+
tm.assert_frame_equal(result, df)
117121

118122

119123
def test_to_json_compression_mode(compression):

0 commit comments

Comments
 (0)