@@ -355,14 +355,15 @@ def read_json(
355
355
dtype = None ,
356
356
convert_axes = None ,
357
357
convert_dates = True ,
358
- keep_default_dates = True ,
359
- numpy = False ,
360
- precise_float = False ,
358
+ keep_default_dates : bool = True ,
359
+ numpy : bool = False ,
360
+ precise_float : bool = False ,
361
361
date_unit = None ,
362
362
encoding = None ,
363
- lines = False ,
364
- chunksize = None ,
363
+ lines : bool = False ,
364
+ chunksize : Optional [ int ] = None ,
365
365
compression = "infer" ,
366
+ nrows : Optional [int ] = None ,
366
367
):
367
368
"""
368
369
Convert a JSON string to pandas object.
@@ -493,13 +494,21 @@ def read_json(
493
494
for more information on ``chunksize``.
494
495
This can only be passed if `lines=True`.
495
496
If this is None, the file will be read into memory all at once.
497
+
496
498
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
497
499
For on-the-fly decompression of on-disk data. If 'infer', then use
498
500
gzip, bz2, zip or xz if path_or_buf is a string ending in
499
501
'.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
500
502
otherwise. If using 'zip', the ZIP file must contain only one data
501
503
file to be read in. Set to None for no decompression.
502
504
505
+ nrows : int, optional
506
+ The number of lines from the line-delimited jsonfile that has to be read.
507
+ This can only be passed if `lines=True`.
508
+ If this is None, all the rows will be returned.
509
+
510
+ .. versionadded:: 1.1
511
+
503
512
Returns
504
513
-------
505
514
Series or DataFrame
@@ -600,6 +609,7 @@ def read_json(
600
609
lines = lines ,
601
610
chunksize = chunksize ,
602
611
compression = compression ,
612
+ nrows = nrows ,
603
613
)
604
614
605
615
if chunksize :
@@ -629,14 +639,15 @@ def __init__(
629
639
dtype ,
630
640
convert_axes ,
631
641
convert_dates ,
632
- keep_default_dates ,
633
- numpy ,
634
- precise_float ,
642
+ keep_default_dates : bool ,
643
+ numpy : bool ,
644
+ precise_float : bool ,
635
645
date_unit ,
636
646
encoding ,
637
- lines ,
638
- chunksize ,
647
+ lines : bool ,
648
+ chunksize : Optional [ int ] ,
639
649
compression ,
650
+ nrows : Optional [int ],
640
651
):
641
652
642
653
self .orient = orient
@@ -654,11 +665,16 @@ def __init__(
654
665
self .chunksize = chunksize
655
666
self .nrows_seen = 0
656
667
self .should_close = False
668
+ self .nrows = nrows
657
669
658
670
if self .chunksize is not None :
659
671
self .chunksize = _validate_integer ("chunksize" , self .chunksize , 1 )
660
672
if not self .lines :
661
673
raise ValueError ("chunksize can only be passed if lines=True" )
674
+ if self .nrows is not None :
675
+ self .nrows = _validate_integer ("nrows" , self .nrows , 0 )
676
+ if not self .lines :
677
+ raise ValueError ("nrows can only be passed if lines=True" )
662
678
663
679
data = self ._get_data_from_filepath (filepath_or_buffer )
664
680
self .data = self ._preprocess_data (data )
@@ -671,9 +687,9 @@ def _preprocess_data(self, data):
671
687
If self.chunksize, we prepare the data for the `__next__` method.
672
688
Otherwise, we read it into memory for the `read` method.
673
689
"""
674
- if hasattr (data , "read" ) and not self .chunksize :
690
+ if hasattr (data , "read" ) and ( not self .chunksize or not self . nrows ) :
675
691
data = data .read ()
676
- if not hasattr (data , "read" ) and self .chunksize :
692
+ if not hasattr (data , "read" ) and ( self .chunksize or self . nrows ) :
677
693
data = StringIO (data )
678
694
679
695
return data
@@ -721,11 +737,17 @@ def read(self):
721
737
"""
722
738
Read the whole JSON input into a pandas object.
723
739
"""
724
- if self .lines and self .chunksize :
725
- obj = concat (self )
726
- elif self .lines :
727
- data = ensure_str (self .data )
728
- obj = self ._get_object_parser (self ._combine_lines (data .split ("\n " )))
740
+ if self .lines :
741
+ if self .chunksize :
742
+ obj = concat (self )
743
+ elif self .nrows :
744
+ lines = list (islice (self .data , self .nrows ))
745
+ lines_json = self ._combine_lines (lines )
746
+ obj = self ._get_object_parser (lines_json )
747
+ else :
748
+ data = ensure_str (self .data )
749
+ data = data .split ("\n " )
750
+ obj = self ._get_object_parser (self ._combine_lines (data ))
729
751
else :
730
752
obj = self ._get_object_parser (self .data )
731
753
self .close ()
@@ -772,6 +794,11 @@ def close(self):
772
794
pass
773
795
774
796
def __next__ (self ):
797
+ if self .nrows :
798
+ if self .nrows_seen >= self .nrows :
799
+ self .close ()
800
+ raise StopIteration
801
+
775
802
lines = list (islice (self .data , self .chunksize ))
776
803
if lines :
777
804
lines_json = self ._combine_lines (lines )
0 commit comments