diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index ecbd6e9b3b288..257cfd41f88dc 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -59,7 +59,7 @@ Bug Fixes - Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`) - Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`) - +- Bug in to_json with lines=true containing backslashed quotes (:issue:`14693`) diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py index ba02e9186f1df..496113659c7a7 100644 --- a/pandas/io/tests/json/test_pandas.py +++ b/pandas/io/tests/json/test_pandas.py @@ -962,9 +962,11 @@ def test_to_jsonl(self): expected = '{"a":1,"b":2}\n{"a":1,"b":2}' self.assertEqual(result, expected) - df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b']) + df = DataFrame([["foo}", "bar"], ['foo"', "bar"], ['foo\\', "bar"]], + columns=['a', 'b']) result = df.to_json(orient="records", lines=True) - expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' + expected = ('{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n' + '{"a":"foo\\\\","b":"bar"}') self.assertEqual(result, expected) assert_frame_equal(pd.read_json(result, lines=True), df) diff --git a/pandas/lib.pyx b/pandas/lib.pyx index b09a1c2755a06..a02a91b8c09b2 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -1111,8 +1111,10 @@ def convert_json_to_lines(object arr): length = narr.shape[0] for i in range(length): v = narr[i] - if v == quote and i > 0 and narr[i - 1] != backslash: - in_quotes = ~in_quotes + if v == quote: + if not (i > 0 and narr[i - 1] == backslash and + i + 1 < length and narr[i + 1] != comma): + in_quotes = ~in_quotes if v == comma: # commas that should be \n if num_open_brackets_seen == 0 and not in_quotes: narr[i] = newline