From d11445527a0ccf15e1ad16c1f0211c062a4c7bbf Mon Sep 17 00:00:00 2001
From: Rouz Azari <rouz.azari@gmail.com>
Date: Thu, 12 Jan 2017 06:50:59 -0800
Subject: [PATCH] BUG: Fix to_json lines with escaped characters

Updates existing to_json methodology by adding is_escaping variable, which ensures escaped chars are handled correctly.

- Includes test for escaped characters in keys and values (i.e. columns and data).
- Includes bug fix in whatsnew
- Revised type of in_quotes and is_escaping to bint

xref #14693
xref #15096
---
 doc/source/whatsnew/v0.20.0.txt     | 1 +
 pandas/io/tests/json/test_pandas.py | 9 +++++++++
 pandas/lib.pyx                      | 7 +++++--
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index b157112b6ff37..9ea7b740bae8f 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -351,6 +351,7 @@ Bug Fixes
 - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)
 - Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`, :issue:`14982`)
 - Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`)
+- Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`)
 
 - Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`)
 - Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`)
diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py
index d7f903153fdae..aaa9752dc6d46 100644
--- a/pandas/io/tests/json/test_pandas.py
+++ b/pandas/io/tests/json/test_pandas.py
@@ -972,6 +972,15 @@ def test_to_jsonl(self):
         self.assertEqual(result, expected)
         assert_frame_equal(pd.read_json(result, lines=True), df)
 
+        # GH15096: escaped characters in columns and data
+        df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]],
+                       columns=["a\\", 'b'])
+        result = df.to_json(orient="records", lines=True)
+        expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n'
+                    '{"a\\\\":"foo\\"","b":"bar"}')
+        self.assertEqual(result, expected)
+        assert_frame_equal(pd.read_json(result, lines=True), df)
+
     def test_latin_encoding(self):
         if compat.PY2:
             self.assertRaisesRegexp(
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index 761969491cfc7..fce6a3d03287e 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -1098,7 +1098,8 @@ def convert_json_to_lines(object arr):
     to quotes & brackets
     """
     cdef:
-        Py_ssize_t i = 0, num_open_brackets_seen = 0, in_quotes = 0, length
+        Py_ssize_t i = 0, num_open_brackets_seen = 0, length
+        bint in_quotes = 0, is_escaping = 0
         ndarray[uint8_t] narr
         unsigned char v, comma, left_bracket, right_brack, newline
 
@@ -1113,8 +1114,10 @@ def convert_json_to_lines(object arr):
     length = narr.shape[0]
     for i in range(length):
         v = narr[i]
-        if v == quote and i > 0 and narr[i - 1] != backslash:
+        if v == quote and i > 0 and not is_escaping:
             in_quotes = ~in_quotes
+        if v == backslash or is_escaping:
+            is_escaping = ~is_escaping
         if v == comma: # commas that should be \n
             if num_open_brackets_seen == 0 and not in_quotes:
                 narr[i] = newline