Skip to content

Commit b53523f

Browse files
ryanking13Mateusz Górski
authored and
Mateusz Górski
committed
BUG: GH29595 fix read_json() to use utf-8 for a default encoding (pandas-dev#29566)
1 parent 58ab4fd commit b53523f

File tree

3 files changed

+15
-0
lines changed

3 files changed

+15
-0
lines changed

doc/source/whatsnew/v1.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,8 @@ I/O
400400
- Bug in :meth:`DataFrame.to_html` when using ``formatters=<list>`` and ``max_cols`` together. (:issue:`25955`)
401401
- Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`)
402402
- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`)
403+
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
404+
-
403405

404406
Plotting
405407
^^^^^^^^

pandas/io/json/_json.py

+2
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,8 @@ def read_json(
577577
dtype = True
578578
if convert_axes is None and orient != "table":
579579
convert_axes = True
580+
if encoding is None:
581+
encoding = "utf-8"
580582

581583
compression = _infer_compression(path_or_buf, compression)
582584
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(

pandas/tests/io/json/test_readlines.py

+11
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,14 @@ def test_readjson_chunks_multiple_empty_lines(chunksize):
173173
tm.assert_frame_equal(
174174
orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize)
175175
)
176+
177+
178+
def test_readjson_unicode(monkeypatch):
179+
with tm.ensure_clean("test.json") as path:
180+
monkeypatch.setattr("_bootlocale.getpreferredencoding", lambda l: "cp949")
181+
with open(path, "w", encoding="utf-8") as f:
182+
f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}')
183+
184+
result = read_json(path)
185+
expected = pd.DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]})
186+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)