73
73
build_table_schema ,
74
74
parse_table_schema ,
75
75
)
76
- from pandas .io .parsers .arrow_parser_wrapper import ArrowParserWrapper
77
- from pandas .io .parsers .base_parser import ParserBase
76
+ from pandas .io .json .arrow_json_parser_wrapper import ArrowJsonParserWrapper
78
77
from pandas .io .parsers .readers import validate_integer
79
78
80
79
if TYPE_CHECKING :
@@ -383,7 +382,7 @@ def read_json(
383
382
date_unit : str | None = ...,
384
383
encoding : str | None = ...,
385
384
encoding_errors : str | None = ...,
386
- engine : JSONEngine | None = ...,
385
+ engine : JSONEngine = ...,
387
386
lines : bool = ...,
388
387
chunksize : int ,
389
388
compression : CompressionOptions = ...,
@@ -408,7 +407,7 @@ def read_json(
408
407
date_unit : str | None = ...,
409
408
encoding : str | None = ...,
410
409
encoding_errors : str | None = ...,
411
- engine : JSONEngine | None = ...,
410
+ engine : JSONEngine = ...,
412
411
lines : bool = ...,
413
412
chunksize : int ,
414
413
compression : CompressionOptions = ...,
@@ -433,7 +432,7 @@ def read_json(
433
432
date_unit : str | None = ...,
434
433
encoding : str | None = ...,
435
434
encoding_errors : str | None = ...,
436
- engine : JSONEngine | None = ...,
435
+ engine : JSONEngine = ...,
437
436
lines : bool = ...,
438
437
chunksize : None = ...,
439
438
compression : CompressionOptions = ...,
@@ -457,7 +456,7 @@ def read_json(
457
456
date_unit : str | None = ...,
458
457
encoding : str | None = ...,
459
458
encoding_errors : str | None = ...,
460
- engine : JSONEngine | None = None ,
459
+ engine : JSONEngine = ... ,
461
460
lines : bool = ...,
462
461
chunksize : None = ...,
463
462
compression : CompressionOptions = ...,
@@ -486,7 +485,7 @@ def read_json(
486
485
date_unit : str | None = None ,
487
486
encoding : str | None = None ,
488
487
encoding_errors : str | None = "strict" ,
489
- engine : JSONEngine | None = None ,
488
+ engine : JSONEngine = "ujson" ,
490
489
lines : bool = False ,
491
490
chunksize : int | None = None ,
492
491
compression : CompressionOptions = "infer" ,
@@ -615,7 +614,7 @@ def read_json(
615
614
616
615
.. versionadded:: 1.3.0
617
616
618
- engine : {{'ujson', 'pyarrow'}}
617
+ engine : {{'ujson', 'pyarrow'}}, default "ujson"
619
618
Parser engine to use.
620
619
621
620
lines : bool, default False
@@ -792,13 +791,13 @@ def __init__(
792
791
precise_float : bool ,
793
792
date_unit ,
794
793
encoding ,
795
- engine ,
796
794
lines : bool ,
797
795
chunksize : int | None ,
798
796
compression : CompressionOptions ,
799
797
nrows : int | None ,
800
798
storage_options : StorageOptions = None ,
801
799
encoding_errors : str | None = "strict" ,
800
+ engine : JSONEngine = "ujson" ,
802
801
) -> None :
803
802
804
803
self .orient = orient
@@ -829,33 +828,45 @@ def __init__(
829
828
self .nrows = validate_integer ("nrows" , self .nrows , 0 )
830
829
if not self .lines :
831
830
raise ValueError ("nrows can only be passed if lines=True" )
831
+ if self .engine == "pyarrow" :
832
+ if not self .lines :
833
+ raise ValueError (
834
+ "currently pyarrow engine only supports "
835
+ "the line-delimited JSON format"
836
+ )
832
837
833
- if engine is not None :
838
+ if self . engine == "pyarrow" :
834
839
self ._engine = self ._make_engine (filepath_or_buffer , self .engine )
835
- else :
840
+ if self . engine == "ujson" :
836
841
data = self ._get_data_from_filepath (filepath_or_buffer )
837
842
self .data = self ._preprocess_data (data )
838
843
839
844
def _make_engine (
840
845
self ,
841
846
filepath_or_buffer : FilePath | ReadBuffer [str ] | ReadBuffer [bytes ],
842
- engine : JSONEngine ,
843
- ) -> ParserBase :
844
-
845
- mapping : dict [str , type [ParserBase ]] = {
846
- "pyarrow" : ArrowParserWrapper ,
847
- "ujson" : ...,
848
- }
849
-
850
- if engine not in mapping :
851
- raise ValueError (
852
- f"Unknown engine: { engine } (valid options are { mapping .keys ()} )"
853
- )
847
+ engine : JSONEngine = "pyarrow" ,
848
+ ) -> ArrowJsonParserWrapper :
854
849
855
850
if not isinstance (filepath_or_buffer , list ):
856
- ...
851
+ is_text = False
852
+ mode = "rb"
853
+ self .handles = get_handle (
854
+ filepath_or_buffer ,
855
+ mode = mode ,
856
+ encoding = self .encoding ,
857
+ is_text = is_text ,
858
+ compression = self .compression ,
859
+ storage_options = self .storage_options ,
860
+ errors = self .encoding_errors ,
861
+ )
862
+ filepath_or_buffer = self .handles .handle
857
863
858
- return mapping [engine ](filepath_or_buffer )
864
+ try :
865
+ return ArrowJsonParserWrapper (filepath_or_buffer )
866
+ except Exception :
867
+ if self .handles is not None :
868
+ self .handles .close ()
869
+ raise
859
870
860
871
def _preprocess_data (self , data ):
861
872
"""
@@ -939,20 +950,23 @@ def read(self) -> DataFrame | Series:
939
950
Read the whole JSON input into a pandas object.
940
951
"""
941
952
obj : DataFrame | Series
942
- if self .lines :
943
- if self .chunksize :
944
- obj = concat (self )
945
- elif self .nrows :
946
- lines = list (islice (self .data , self .nrows ))
947
- lines_json = self ._combine_lines (lines )
948
- obj = self ._get_object_parser (lines_json )
953
+ if self .engine == "pyarrow" :
954
+ obj = self ._engine .read ()
955
+ if self .engine == "ujson" :
956
+ if self .lines :
957
+ if self .chunksize :
958
+ obj = concat (self )
959
+ elif self .nrows :
960
+ lines = list (islice (self .data , self .nrows ))
961
+ lines_json = self ._combine_lines (lines )
962
+ obj = self ._get_object_parser (lines_json )
963
+ else :
964
+ data = ensure_str (self .data )
965
+ data_lines = data .split ("\n " )
966
+ obj = self ._get_object_parser (self ._combine_lines (data_lines ))
949
967
else :
950
- data = ensure_str (self .data )
951
- data_lines = data .split ("\n " )
952
- obj = self ._get_object_parser (self ._combine_lines (data_lines ))
953
- else :
954
- obj = self ._get_object_parser (self .data )
955
- self .close ()
968
+ obj = self ._get_object_parser (self .data )
969
+ self .close ()
956
970
return obj
957
971
958
972
def _get_object_parser (self , json ) -> DataFrame | Series :
0 commit comments