From 58186ccbae0ffd509f5b407792485fffe702fcf8 Mon Sep 17 00:00:00 2001 From: Rohith295 Date: Mon, 5 Oct 2020 21:19:44 +0200 Subject: [PATCH 1/4] Added extra new line character while calling convert `convert_json_to_lines` method. Also reformatted and merged the code with latest master branch --- pandas/_libs/writers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index f6823c3cb0d3f..3d724d1f21d22 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -108,7 +108,7 @@ def convert_json_to_lines(arr: object) -> str: if not in_quotes: num_open_brackets_seen -= 1 - return narr.tobytes().decode('utf-8') + return narr.tobytes().decode('utf-8') + '\n' # stata, pytables From 4d5be9bada2dc5ee7d95f37ea8b0a44a83be046a Mon Sep 17 00:00:00 2001 From: Rohith295 Date: Mon, 5 Oct 2020 21:44:03 +0200 Subject: [PATCH 2/4] Merged with master --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a269580bc4453..aad7bab38745d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -388,6 +388,7 @@ I/O - Bug in :meth:`read_csv` with ``engine='python'`` truncating data if multiple items present in first row and first element started with BOM (:issue:`36343`) - Removed ``private_key`` and ``verbose`` from :func:`read_gbq` as they are no longer supported in ``pandas-gbq`` (:issue:`34654`, :issue:`30200`) - Bumped minimum pytables version to 3.5.1 to avoid a ``ValueError`` in :meth:`read_hdf` (:issue:`24839`) +- Bug in :meth:`to_json` with ``lines=True`` and ``orient=`'records'` the last line of the record is not appended with 'new line character' (:issue:`36888`) Plotting ^^^^^^^^ From fbdfe929f29d4b04e555688fde916889f60c6d84 Mon Sep 17 00:00:00 2001 From: Rohith295 Date: Mon, 5 Oct 2020 21:46:35 +0200 Subject: [PATCH 3/4] Fixed formatting issues --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index aad7bab38745d..52cfa8005f672 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -388,7 +388,7 @@ I/O - Bug in :meth:`read_csv` with ``engine='python'`` truncating data if multiple items present in first row and first element started with BOM (:issue:`36343`) - Removed ``private_key`` and ``verbose`` from :func:`read_gbq` as they are no longer supported in ``pandas-gbq`` (:issue:`34654`, :issue:`30200`) - Bumped minimum pytables version to 3.5.1 to avoid a ``ValueError`` in :meth:`read_hdf` (:issue:`24839`) -- Bug in :meth:`to_json` with ``lines=True`` and ``orient=`'records'` the last line of the record is not appended with 'new line character' (:issue:`36888`) +- Bug in :meth:`to_json` with ``lines=True`` and ``orient='records'`` the last line of the record is not appended with 'new line character' (:issue:`36888`) Plotting ^^^^^^^^ From d943a02335cdb1af45c5e771900166df393041a6 Mon Sep 17 00:00:00 2001 From: Rohith295 Date: Tue, 6 Oct 2020 11:46:15 +0200 Subject: [PATCH 4/4] Fixed breaking tests, Also included a new testcase for GH:36888. Merged master changes into current branch --- pandas/_libs/writers.pyx | 2 +- pandas/tests/io/json/test_pandas.py | 6 +++--- pandas/tests/io/json/test_readlines.py | 14 +++++++++++--- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 3d724d1f21d22..06f180eef0c65 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -108,7 +108,7 @@ def convert_json_to_lines(arr: object) -> str: if not in_quotes: num_open_brackets_seen -= 1 - return narr.tobytes().decode('utf-8') + '\n' + return narr.tobytes().decode('utf-8') + '\n' # GH:36888 # stata, pytables diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 822342113f62a..20a7581c32d69 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1291,19 +1291,19 @@ def test_to_jsonl(self): # GH9180 df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) result = df.to_json(orient="records", lines=True) - expected = '{"a":1,"b":2}\n{"a":1,"b":2}' + expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n' assert result == expected df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"]) result = df.to_json(orient="records", lines=True) - expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' + expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n' assert result == expected tm.assert_frame_equal(pd.read_json(result, lines=True), df) # GH15096: escaped characters in columns and data df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"]) result = df.to_json(orient="records", lines=True) - expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}' + expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n' assert result == expected tm.assert_frame_equal(pd.read_json(result, lines=True), df) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index b475fa2c514ff..a6ffa7e97d375 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -45,23 +45,31 @@ def test_to_jsonl(): # GH9180 df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) result = df.to_json(orient="records", lines=True) - expected = '{"a":1,"b":2}\n{"a":1,"b":2}' + expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n' assert result == expected df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"]) result = df.to_json(orient="records", lines=True) - expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' + expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n' assert result == expected tm.assert_frame_equal(read_json(result, lines=True), df) # GH15096: escaped characters in columns and data df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"]) result = df.to_json(orient="records", lines=True) - expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}' + expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n' assert result == expected tm.assert_frame_equal(read_json(result, lines=True), df) +def test_to_jsonl_count_new_lines(): + # GH36888 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + actual_new_lines_count = df.to_json(orient="records", lines=True).count("\n") + expected_new_lines_count = 2 + assert actual_new_lines_count == expected_new_lines_count + + @pytest.mark.parametrize("chunksize", [1, 1.0]) def test_readjson_chunks(lines_json_df, chunksize): # Basic test that read_json(chunks=True) gives the same result as