29
29
DtypeArg ,
30
30
FilePath ,
31
31
IndexLabel ,
32
+ JSONEngine ,
32
33
JSONSerializable ,
33
34
ReadBuffer ,
34
35
StorageOptions ,
72
73
build_table_schema ,
73
74
parse_table_schema ,
74
75
)
76
+ from pandas .io .parsers .arrow_parser_wrapper import ArrowParserWrapper
77
+ from pandas .io .parsers .base_parser import ParserBase
75
78
from pandas .io .parsers .readers import validate_integer
76
79
77
80
if TYPE_CHECKING :
@@ -380,6 +383,7 @@ def read_json(
380
383
date_unit : str | None = ...,
381
384
encoding : str | None = ...,
382
385
encoding_errors : str | None = ...,
386
+ engine : JSONEngine | None = ...,
383
387
lines : bool = ...,
384
388
chunksize : int ,
385
389
compression : CompressionOptions = ...,
@@ -404,6 +408,7 @@ def read_json(
404
408
date_unit : str | None = ...,
405
409
encoding : str | None = ...,
406
410
encoding_errors : str | None = ...,
411
+ engine : JSONEngine | None = ...,
407
412
lines : bool = ...,
408
413
chunksize : int ,
409
414
compression : CompressionOptions = ...,
@@ -428,6 +433,7 @@ def read_json(
428
433
date_unit : str | None = ...,
429
434
encoding : str | None = ...,
430
435
encoding_errors : str | None = ...,
436
+ engine : JSONEngine | None = ...,
431
437
lines : bool = ...,
432
438
chunksize : None = ...,
433
439
compression : CompressionOptions = ...,
@@ -451,6 +457,7 @@ def read_json(
451
457
date_unit : str | None = ...,
452
458
encoding : str | None = ...,
453
459
encoding_errors : str | None = ...,
460
+ engine : JSONEngine | None = None ,
454
461
lines : bool = ...,
455
462
chunksize : None = ...,
456
463
compression : CompressionOptions = ...,
@@ -479,6 +486,7 @@ def read_json(
479
486
date_unit : str | None = None ,
480
487
encoding : str | None = None ,
481
488
encoding_errors : str | None = "strict" ,
489
+ engine : JSONEngine | None = None ,
482
490
lines : bool = False ,
483
491
chunksize : int | None = None ,
484
492
compression : CompressionOptions = "infer" ,
@@ -607,6 +615,9 @@ def read_json(
607
615
608
616
.. versionadded:: 1.3.0
609
617
618
+ engine : {{'ujson', 'pyarrow'}}
619
+ Parser engine to use.
620
+
610
621
lines : bool, default False
611
622
Read the file as a json object per line.
612
623
@@ -743,6 +754,7 @@ def read_json(
743
754
precise_float = precise_float ,
744
755
date_unit = date_unit ,
745
756
encoding = encoding ,
757
+ engine = engine ,
746
758
lines = lines ,
747
759
chunksize = chunksize ,
748
760
compression = compression ,
@@ -780,6 +792,7 @@ def __init__(
780
792
precise_float : bool ,
781
793
date_unit ,
782
794
encoding ,
795
+ engine ,
783
796
lines : bool ,
784
797
chunksize : int | None ,
785
798
compression : CompressionOptions ,
@@ -798,6 +811,7 @@ def __init__(
798
811
self .precise_float = precise_float
799
812
self .date_unit = date_unit
800
813
self .encoding = encoding
814
+ self .engine = engine
801
815
self .compression = compression
802
816
self .storage_options = storage_options
803
817
self .lines = lines
@@ -816,8 +830,32 @@ def __init__(
816
830
if not self .lines :
817
831
raise ValueError ("nrows can only be passed if lines=True" )
818
832
819
- data = self ._get_data_from_filepath (filepath_or_buffer )
820
- self .data = self ._preprocess_data (data )
833
+ if engine is not None :
834
+ self ._engine = self ._make_engine (filepath_or_buffer , self .engine )
835
+ else :
836
+ data = self ._get_data_from_filepath (filepath_or_buffer )
837
+ self .data = self ._preprocess_data (data )
838
+
839
+ def _make_engine (
840
+ self ,
841
+ filepath_or_buffer : FilePath | ReadBuffer [str ] | ReadBuffer [bytes ],
842
+ engine : JSONEngine ,
843
+ ) -> ParserBase :
844
+
845
+ mapping : dict [str , type [ParserBase ]] = {
846
+ "pyarrow" : ArrowParserWrapper ,
847
+ "ujson" : ...,
848
+ }
849
+
850
+ if engine not in mapping :
851
+ raise ValueError (
852
+ f"Unknown engine: { engine } (valid options are { mapping .keys ()} )"
853
+ )
854
+
855
+ if not isinstance (filepath_or_buffer , list ):
856
+ ...
857
+
858
+ return mapping [engine ](filepath_or_buffer )
821
859
822
860
def _preprocess_data (self , data ):
823
861
"""
0 commit comments