@@ -363,6 +363,7 @@ def read_json(
363
363
lines = False ,
364
364
chunksize = None ,
365
365
compression = "infer" ,
366
+ nrows = None ,
366
367
):
367
368
"""
368
369
Convert a JSON string to pandas object.
@@ -493,6 +494,12 @@ def read_json(
493
494
for more information on ``chunksize``.
494
495
This can only be passed if `lines=True`.
495
496
If this is None, the file will be read into memory all at once.
497
+
498
+ chunksize : int, optional
499
+ The number of lines from the line-delimited jsonfile that has to be read.
500
+ This can only be passed if `lines=True`.
501
+ If this is None, all the rows will be returned.
502
+
496
503
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
497
504
For on-the-fly decompression of on-disk data. If 'infer', then use
498
505
gzip, bz2, zip or xz if path_or_buf is a string ending in
@@ -600,6 +607,7 @@ def read_json(
600
607
lines = lines ,
601
608
chunksize = chunksize ,
602
609
compression = compression ,
610
+ nrows = nrows ,
603
611
)
604
612
605
613
if chunksize :
@@ -637,6 +645,7 @@ def __init__(
637
645
lines ,
638
646
chunksize ,
639
647
compression ,
648
+ nrows
640
649
):
641
650
642
651
self .path_or_buf = filepath_or_buffer
@@ -655,11 +664,16 @@ def __init__(
655
664
self .chunksize = chunksize
656
665
self .nrows_seen = 0
657
666
self .should_close = False
667
+ self .nrows = nrows
658
668
659
669
if self .chunksize is not None :
660
670
self .chunksize = _validate_integer ("chunksize" , self .chunksize , 1 )
661
671
if not self .lines :
662
672
raise ValueError ("chunksize can only be passed if lines=True" )
673
+ if self .nrows is not None :
674
+ self .chunksize = _validate_integer ("nrows" , self .nrows , 0 )
675
+ if not self .lines :
676
+ raise ValueError ("nrows can only be passed if lines=True" )
663
677
664
678
data = self ._get_data_from_filepath (filepath_or_buffer )
665
679
self .data = self ._preprocess_data (data )
@@ -726,7 +740,10 @@ def read(self):
726
740
obj = concat (self )
727
741
elif self .lines :
728
742
data = ensure_str (self .data )
729
- obj = self ._get_object_parser (self ._combine_lines (data .split ("\n " )))
743
+ data = data .split ("\n " )
744
+ if self .nrows :
745
+ data = data [:self .nrows ]
746
+ obj = self ._get_object_parser (self ._combine_lines (data ))
730
747
else :
731
748
obj = self ._get_object_parser (self .data )
732
749
self .close ()
@@ -773,6 +790,11 @@ def close(self):
773
790
pass
774
791
775
792
def __next__ (self ):
793
+ if self .nrows :
794
+ if self .nrows_seen >= self .nrows :
795
+ self .close ()
796
+ raise StopIteration
797
+
776
798
lines = list (islice (self .data , self .chunksize ))
777
799
if lines :
778
800
lines_json = self ._combine_lines (lines )
0 commit comments