Skip to content

Commit 10199ee

Browse files
Rohith295Kevin D Smith
authored and
Kevin D Smith
committed
BUG: Add trailing trailing newline in to_json (pandas-dev#36898)
1 parent d4f7d2a commit 10199ee

File tree

4 files changed

+16
-7
lines changed

4 files changed

+16
-7
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,7 @@ I/O
420420
- Removed ``private_key`` and ``verbose`` from :func:`read_gbq` as they are no longer supported in ``pandas-gbq`` (:issue:`34654`, :issue:`30200`)
421421
- Bumped minimum pytables version to 3.5.1 to avoid a ``ValueError`` in :meth:`read_hdf` (:issue:`24839`)
422422
- Bug in :func:`read_table` and :func:`read_csv` when ``delim_whitespace=True`` and ``sep=default`` (:issue:`36583`)
423+
- Bug in :meth:`to_json` with ``lines=True`` and ``orient='records'`` the last line of the record is not appended with 'new line character' (:issue:`36888`)
423424
- Bug in :meth:`read_parquet` with fixed offset timezones. String representation of timezones was not recognized (:issue:`35997`, :issue:`36004`)
424425

425426
Plotting

pandas/_libs/writers.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def convert_json_to_lines(arr: object) -> str:
108108
if not in_quotes:
109109
num_open_brackets_seen -= 1
110110

111-
return narr.tobytes().decode('utf-8')
111+
return narr.tobytes().decode('utf-8') + '\n' # GH:36888
112112

113113

114114
# stata, pytables

pandas/tests/io/json/test_pandas.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1291,19 +1291,19 @@ def test_to_jsonl(self):
12911291
# GH9180
12921292
df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
12931293
result = df.to_json(orient="records", lines=True)
1294-
expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
1294+
expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
12951295
assert result == expected
12961296

12971297
df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
12981298
result = df.to_json(orient="records", lines=True)
1299-
expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
1299+
expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
13001300
assert result == expected
13011301
tm.assert_frame_equal(pd.read_json(result, lines=True), df)
13021302

13031303
# GH15096: escaped characters in columns and data
13041304
df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
13051305
result = df.to_json(orient="records", lines=True)
1306-
expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}'
1306+
expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
13071307
assert result == expected
13081308
tm.assert_frame_equal(pd.read_json(result, lines=True), df)
13091309

pandas/tests/io/json/test_readlines.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -45,23 +45,31 @@ def test_to_jsonl():
4545
# GH9180
4646
df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
4747
result = df.to_json(orient="records", lines=True)
48-
expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
48+
expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
4949
assert result == expected
5050

5151
df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
5252
result = df.to_json(orient="records", lines=True)
53-
expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
53+
expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
5454
assert result == expected
5555
tm.assert_frame_equal(read_json(result, lines=True), df)
5656

5757
# GH15096: escaped characters in columns and data
5858
df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
5959
result = df.to_json(orient="records", lines=True)
60-
expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}'
60+
expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
6161
assert result == expected
6262
tm.assert_frame_equal(read_json(result, lines=True), df)
6363

6464

65+
def test_to_jsonl_count_new_lines():
66+
# GH36888
67+
df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
68+
actual_new_lines_count = df.to_json(orient="records", lines=True).count("\n")
69+
expected_new_lines_count = 2
70+
assert actual_new_lines_count == expected_new_lines_count
71+
72+
6573
@pytest.mark.parametrize("chunksize", [1, 1.0])
6674
def test_readjson_chunks(lines_json_df, chunksize):
6775
# Basic test that read_json(chunks=True) gives the same result as

0 commit comments

Comments
 (0)