Skip to content

Commit 8c75d94

Browse files
committed
BUG: Fix to_json lines with escaped characters
Updates existing to_json methodology by adding is_escaping variable, which ensures escaped chars are handled correctly. Bug description: A simple check of whether the prior char is a backslash is insufficient because the backslash may itself be escaped. A test is also included (previously included in #14693). xref #14693 xref #15096
1 parent 0fe491d commit 8c75d94

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

pandas/io/tests/json/test_pandas.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -966,9 +966,11 @@ def test_to_jsonl(self):
966966
expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
967967
self.assertEqual(result, expected)
968968

969-
df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
969+
df = DataFrame([["foo}", "bar"], ['foo"', "bar"], ["foo\\", "bar"]],
970+
columns=['a', 'b'])
970971
result = df.to_json(orient="records", lines=True)
971-
expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
972+
expected = ('{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
973+
'{"a":"foo\\\\","b":"bar"}')
972974
self.assertEqual(result, expected)
973975
assert_frame_equal(pd.read_json(result, lines=True), df)
974976

pandas/lib.pyx

+4-1
Original file line numberDiff line numberDiff line change
@@ -1099,6 +1099,7 @@ def convert_json_to_lines(object arr):
10991099
"""
11001100
cdef:
11011101
Py_ssize_t i = 0, num_open_brackets_seen = 0, in_quotes = 0, length
1102+
Py_ssize_t is_escaping = 0
11021103
ndarray[uint8_t] narr
11031104
unsigned char v, comma, left_bracket, right_brack, newline
11041105

@@ -1113,8 +1114,10 @@ def convert_json_to_lines(object arr):
11131114
length = narr.shape[0]
11141115
for i in range(length):
11151116
v = narr[i]
1116-
if v == quote and i > 0 and narr[i - 1] != backslash:
1117+
if v == quote and i > 0 and not is_escaping::
11171118
in_quotes = ~in_quotes
1119+
if v == backslash or is_escaping:
1120+
is_escaping = ~is_escaping
11181121
if v == comma: # commas that should be \n
11191122
if num_open_brackets_seen == 0 and not in_quotes:
11201123
narr[i] = newline

0 commit comments

Comments
 (0)