Skip to content

Commit 04023d2

Browse files
Josh OwenPiotr Chromiec
Josh Owen
authored and
Piotr Chromiec
committed
BUG: fix json with lines=True for quoted special characters
closes pandas-dev#14391 closes pandas-dev#14390
1 parent 68eefba commit 04023d2

File tree

4 files changed

+41
-4
lines changed

4 files changed

+41
-4
lines changed

asv_bench/benchmarks/packers.py

+25
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,31 @@ def remove(self, f):
547547
pass
548548

549549

550+
class packers_write_json_lines(object):
551+
goal_time = 0.2
552+
553+
def setup(self):
554+
self.f = '__test__.msg'
555+
self.N = 100000
556+
self.C = 5
557+
self.index = date_range('20000101', periods=self.N, freq='H')
558+
self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index)
559+
self.remove(self.f)
560+
self.df.index = np.arange(self.N)
561+
562+
def time_packers_write_json_lines(self):
563+
self.df.to_json(self.f, orient="records", lines=True)
564+
565+
def teardown(self):
566+
self.remove(self.f)
567+
568+
def remove(self, f):
569+
try:
570+
os.remove(self.f)
571+
except:
572+
pass
573+
574+
550575
class packers_write_json_T(object):
551576
goal_time = 0.2
552577

doc/source/whatsnew/v0.19.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ Bug Fixes
4545

4646
- Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`)
4747
- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`)
48+
- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)

pandas/io/json.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -607,14 +607,19 @@ def _convert_to_line_delimits(s):
607607
s = s[1:-1]
608608
num_open_brackets_seen = 0
609609
commas_to_replace = []
610+
in_quotes = False
610611
for idx, char in enumerate(s): # iter through to find all
611-
if char == ',': # commas that should be \n
612-
if num_open_brackets_seen == 0:
612+
if char == '"' and idx > 0 and s[idx - 1] != '\\':
613+
in_quotes = ~in_quotes
614+
elif char == ',': # commas that should be \n
615+
if num_open_brackets_seen == 0 and not in_quotes:
613616
commas_to_replace.append(idx)
614617
elif char == '{':
615-
num_open_brackets_seen += 1
618+
if not in_quotes:
619+
num_open_brackets_seen += 1
616620
elif char == '}':
617-
num_open_brackets_seen -= 1
621+
if not in_quotes:
622+
num_open_brackets_seen -= 1
618623
s_arr = np.array(list(s)) # Turn to an array to set
619624
s_arr[commas_to_replace] = '\n' # all commas at once.
620625
s = ''.join(s_arr)

pandas/io/tests/json/test_pandas.py

+6
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,12 @@ def test_to_jsonl(self):
962962
expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
963963
self.assertEqual(result, expected)
964964

965+
df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
966+
result = df.to_json(orient="records", lines=True)
967+
expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
968+
self.assertEqual(result, expected)
969+
assert_frame_equal(pd.read_json(result, lines=True), df)
970+
965971
def test_latin_encoding(self):
966972
if compat.PY2:
967973
self.assertRaisesRegexp(

0 commit comments

Comments
 (0)