BUG: Add trailing trailing newline in to_json (pandas-dev#36898)

Rohith295 · Kevin D Smith · commit 10199ee3c091 · 2020-11-02T08:51:46.000-06:00
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -420,6 +420,7 @@ I/O
 - Removed ``private_key`` and ``verbose`` from :func:`read_gbq` as they are no longer supported in ``pandas-gbq`` (:issue:`34654`, :issue:`30200`)
 - Bumped minimum pytables version to 3.5.1 to avoid a ``ValueError`` in :meth:`read_hdf` (:issue:`24839`)
 - Bug in :func:`read_table` and :func:`read_csv` when ``delim_whitespace=True`` and ``sep=default`` (:issue:`36583`)
+- Bug in :meth:`to_json` with ``lines=True`` and ``orient='records'`` the last line of the record is not appended with 'new line character' (:issue:`36888`)
 - Bug in :meth:`read_parquet` with fixed offset timezones. String representation of timezones was not recognized (:issue:`35997`, :issue:`36004`)
 
 Plotting
diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx
@@ -108,7 +108,7 @@ def convert_json_to_lines(arr: object) -> str:
             if not in_quotes:
                 num_open_brackets_seen -= 1
 
-    return narr.tobytes().decode('utf-8')
+    return narr.tobytes().decode('utf-8') + '\n'  # GH:36888
 
 
 # stata, pytables
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1291,19 +1291,19 @@ def test_to_jsonl(self):
         # GH9180
         df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
         result = df.to_json(orient="records", lines=True)
-        expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
+        expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
         assert result == expected
 
         df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
         result = df.to_json(orient="records", lines=True)
-        expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
+        expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
         assert result == expected
         tm.assert_frame_equal(pd.read_json(result, lines=True), df)
 
         # GH15096: escaped characters in columns and data
         df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
         result = df.to_json(orient="records", lines=True)
-        expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}'
+        expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
         assert result == expected
         tm.assert_frame_equal(pd.read_json(result, lines=True), df)
 
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
@@ -45,23 +45,31 @@ def test_to_jsonl():
     # GH9180
     df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
     result = df.to_json(orient="records", lines=True)
-    expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
+    expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
     assert result == expected
 
     df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
     result = df.to_json(orient="records", lines=True)
-    expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
+    expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
     assert result == expected
     tm.assert_frame_equal(read_json(result, lines=True), df)
 
     # GH15096: escaped characters in columns and data
     df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
     result = df.to_json(orient="records", lines=True)
-    expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}'
+    expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
     assert result == expected
     tm.assert_frame_equal(read_json(result, lines=True), df)
 
 
+def test_to_jsonl_count_new_lines():
+    # GH36888
+    df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+    actual_new_lines_count = df.to_json(orient="records", lines=True).count("\n")
+    expected_new_lines_count = 2
+    assert actual_new_lines_count == expected_new_lines_count
+
+
 @pytest.mark.parametrize("chunksize", [1, 1.0])
 def test_readjson_chunks(lines_json_df, chunksize):
     # Basic test that read_json(chunks=True) gives the same result as