diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index cd012fe755337..e5831812619a9 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -400,6 +400,8 @@ I/O - Bug in :meth:`DataFrame.to_html` when using ``formatters=`` and ``max_cols`` together. (:issue:`25955`) - Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`) - Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`) +- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`) +- Plotting ^^^^^^^^ diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 8615355996031..0a8f275cf54a9 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -577,6 +577,8 @@ def read_json( dtype = True if convert_axes is None and orient != "table": convert_axes = True + if encoding is None: + encoding = "utf-8" compression = _infer_compression(path_or_buf, compression) filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 05f97a1769205..c4e03e24a7495 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -173,3 +173,14 @@ def test_readjson_chunks_multiple_empty_lines(chunksize): tm.assert_frame_equal( orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize) ) + + +def test_readjson_unicode(monkeypatch): + with tm.ensure_clean("test.json") as path: + monkeypatch.setattr("_bootlocale.getpreferredencoding", lambda l: "cp949") + with open(path, "w", encoding="utf-8") as f: + f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}') + + result = read_json(path) + expected = pd.DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]}) + tm.assert_frame_equal(result, expected)