1
1
# pylint: disable-msg=E1101,W0613,W0603
2
+ from itertools import islice
3
+ from pandas import concat
2
4
import os
3
5
import numpy as np
4
6
@@ -174,7 +176,7 @@ def write(self):
174
176
def read_json (path_or_buf = None , orient = None , typ = 'frame' , dtype = True ,
175
177
convert_axes = True , convert_dates = True , keep_default_dates = True ,
176
178
numpy = False , precise_float = False , date_unit = None , encoding = None ,
177
- lines = False ):
179
+ lines = False , chunksize = None ):
178
180
"""
179
181
Convert a JSON string to pandas object
180
182
@@ -263,6 +265,14 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
263
265
264
266
.. versionadded:: 0.19.0
265
267
268
+ chunksize: integer, default None
269
+ If `lines=True`, how many lines to read into memory at a time.
270
+ If this is None, the file will be read into memory all at once.
271
+ Passing a chunksize helps with memory usage, but is slower.
272
+ Also note this is different from the `chunksize` parameter in
273
+ `read_csv`, which returns a FileTextReader.
274
+ If the JSON input is a string, this argument has no effect.
275
+
266
276
Returns
267
277
-------
268
278
result : Series or DataFrame, depending on the value of `typ`.
@@ -334,12 +344,18 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
334
344
if exists :
335
345
fh , handles = _get_handle (filepath_or_buffer , 'r' ,
336
346
encoding = encoding )
337
- json = fh .read ()
338
- fh .close ()
347
+ if lines and chunksize :
348
+ return _read_json_as_lines (fh , chunksize )
349
+ else :
350
+ json = fh .read ()
351
+ fh .close ()
339
352
else :
340
353
json = filepath_or_buffer
341
354
elif hasattr (filepath_or_buffer , 'read' ):
342
- json = filepath_or_buffer .read ()
355
+ if lines and chunksize :
356
+ return _read_json_as_lines (fh , chunksize )
357
+ else :
358
+ json = filepath_or_buffer .read ()
343
359
else :
344
360
json = filepath_or_buffer
345
361
@@ -349,6 +365,28 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
349
365
lines = list (StringIO (json .strip ()))
350
366
json = '[' + ',' .join (lines ) + ']'
351
367
368
+ return _get_obj (typ , json , orient , dtype , convert_axes , convert_dates , keep_default_dates , numpy , precise_float , date_unit )
369
+
370
+ def _read_json_as_lines (fh , chunksize ):
371
+ while True :
372
+ lines = list (islice (fh , chunksize ))
373
+
374
+ if lines :
375
+ lines_json = '[' + ',' .join (lines ) + ']'
376
+ obj = _get_obj (typ , lines_json , orient , dtype , convert_axes , convert_dates , keep_default_dates , numpy , precise_float , date_unit )
377
+ if not return_val :
378
+ obj = return_val
379
+ else :
380
+ return_val = concat ([return_val , obj ])
381
+
382
+ else :
383
+ break
384
+ fh .close ()
385
+ return return_val
386
+
387
+ def _get_obj (typ , json , orient , dtype , convert_axes , convert_dates ,
388
+ keep_default_dates , numpy , precise_float ,
389
+ date_unit ):
352
390
obj = None
353
391
if typ == 'frame' :
354
392
obj = FrameParser (json , orient , dtype , convert_axes , convert_dates ,
0 commit comments