From 86d76fa18c0d3c5548def3a0fd7d85faf32fa86b Mon Sep 17 00:00:00 2001 From: ryanking13 Date: Tue, 12 Nov 2019 16:13:22 +0900 Subject: [PATCH 1/2] BUG: GH29595 fix read_json() to use utf-8 for a default encoding --- pandas/io/json/_json.py | 2 ++ pandas/tests/io/json/test_readlines.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 8615355996031..0a8f275cf54a9 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -577,6 +577,8 @@ def read_json( dtype = True if convert_axes is None and orient != "table": convert_axes = True + if encoding is None: + encoding = "utf-8" compression = _infer_compression(path_or_buf, compression) filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 05f97a1769205..c4e03e24a7495 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -173,3 +173,14 @@ def test_readjson_chunks_multiple_empty_lines(chunksize): tm.assert_frame_equal( orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize) ) + + +def test_readjson_unicode(monkeypatch): + with tm.ensure_clean("test.json") as path: + monkeypatch.setattr("_bootlocale.getpreferredencoding", lambda l: "cp949") + with open(path, "w", encoding="utf-8") as f: + f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}') + + result = read_json(path) + expected = pd.DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]}) + tm.assert_frame_equal(result, expected) From e6bc2d2446737347c6cb7613fcda1712b831363c Mon Sep 17 00:00:00 2001 From: ryanking13 Date: Tue, 12 Nov 2019 16:58:46 +0900 Subject: [PATCH 2/2] update whatsnew --- doc/source/whatsnew/v1.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index cd012fe755337..e5831812619a9 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -400,6 +400,8 @@ I/O - Bug in :meth:`DataFrame.to_html` when using ``formatters=`` and ``max_cols`` together. (:issue:`25955`) - Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`) - Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`) +- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`) +- Plotting ^^^^^^^^