Skip to content

Commit fe1cfbd

Browse files
committed
Make read_json with lines=True more memory-efficient
Instead of reading the whole file to memory and then manipulating it, read and parse it 10k lines at a time. This only covers some kinds of input to read_json. This also is much slower than the previous implementation.
1 parent c55dbf0 commit fe1cfbd

File tree

1 file changed

+26
-2
lines changed

1 file changed

+26
-2
lines changed

pandas/io/json/json.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# pylint: disable-msg=E1101,W0613,W0603
2+
from itertools import islice
3+
from pandas import concat
24
import os
35
import numpy as np
46

@@ -334,8 +336,25 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
334336
if exists:
335337
fh, handles = _get_handle(filepath_or_buffer, 'r',
336338
encoding=encoding)
337-
json = fh.read()
338-
fh.close()
339+
if lines:
340+
return_val = None
341+
while True:
342+
lines = list(islice(fh, 10000))
343+
if lines:
344+
lines_json = '[' + ','.join(lines) + ']'
345+
obj = _get_obj(typ, lines_json, orient, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit)
346+
if not return_val:
347+
obj = return_val
348+
else:
349+
return_val = concat([return_val, obj])
350+
351+
else:
352+
break
353+
fh.close()
354+
return return_val
355+
else:
356+
json = fh.read()
357+
fh.close()
339358
else:
340359
json = filepath_or_buffer
341360
elif hasattr(filepath_or_buffer, 'read'):
@@ -349,6 +368,11 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
349368
lines = list(StringIO(json.strip()))
350369
json = '[' + ','.join(lines) + ']'
351370

371+
return _get_obj(typ, json, orient, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit)
372+
373+
def _get_obj(typ, json, orient, dtype, convert_axes, convert_dates,
374+
keep_default_dates, numpy, precise_float,
375+
date_unit):
352376
obj = None
353377
if typ == 'frame':
354378
obj = FrameParser(json, orient, dtype, convert_axes, convert_dates,

0 commit comments

Comments
 (0)