Skip to content

Commit a20e415

Browse files
committed
read_json engine argument integration
- added JSONEngine to _typing.py - added engine to `read_json` inputs - added engine to `read_json` docstring - added engine logic to `JsonReader` - added basis of the _make_engine method
1 parent 3f310c4 commit a20e415

File tree

2 files changed

+43
-2
lines changed

2 files changed

+43
-2
lines changed

pandas/_typing.py

+3
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,9 @@ def closed(self) -> bool:
319319
# read_csv engines
320320
CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
321321

322+
# read_json engines
323+
JSONEngine = Literal["ujson", "pyarrow"]
324+
322325
# read_xml parsers
323326
XMLParsers = Literal["lxml", "etree"]
324327

pandas/io/json/_json.py

+40-2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
DtypeArg,
3030
FilePath,
3131
IndexLabel,
32+
JSONEngine,
3233
JSONSerializable,
3334
ReadBuffer,
3435
StorageOptions,
@@ -72,6 +73,8 @@
7273
build_table_schema,
7374
parse_table_schema,
7475
)
76+
from pandas.io.parsers.arrow_parser_wrapper import ArrowParserWrapper
77+
from pandas.io.parsers.base_parser import ParserBase
7578
from pandas.io.parsers.readers import validate_integer
7679

7780
if TYPE_CHECKING:
@@ -380,6 +383,7 @@ def read_json(
380383
date_unit: str | None = ...,
381384
encoding: str | None = ...,
382385
encoding_errors: str | None = ...,
386+
engine: JSONEngine | None = ...,
383387
lines: bool = ...,
384388
chunksize: int,
385389
compression: CompressionOptions = ...,
@@ -404,6 +408,7 @@ def read_json(
404408
date_unit: str | None = ...,
405409
encoding: str | None = ...,
406410
encoding_errors: str | None = ...,
411+
engine: JSONEngine | None = ...,
407412
lines: bool = ...,
408413
chunksize: int,
409414
compression: CompressionOptions = ...,
@@ -428,6 +433,7 @@ def read_json(
428433
date_unit: str | None = ...,
429434
encoding: str | None = ...,
430435
encoding_errors: str | None = ...,
436+
engine: JSONEngine | None = ...,
431437
lines: bool = ...,
432438
chunksize: None = ...,
433439
compression: CompressionOptions = ...,
@@ -451,6 +457,7 @@ def read_json(
451457
date_unit: str | None = ...,
452458
encoding: str | None = ...,
453459
encoding_errors: str | None = ...,
460+
engine: JSONEngine | None = None,
454461
lines: bool = ...,
455462
chunksize: None = ...,
456463
compression: CompressionOptions = ...,
@@ -479,6 +486,7 @@ def read_json(
479486
date_unit: str | None = None,
480487
encoding: str | None = None,
481488
encoding_errors: str | None = "strict",
489+
engine: JSONEngine | None = None,
482490
lines: bool = False,
483491
chunksize: int | None = None,
484492
compression: CompressionOptions = "infer",
@@ -607,6 +615,9 @@ def read_json(
607615
608616
.. versionadded:: 1.3.0
609617
618+
engine : {{'ujson', 'pyarrow'}}
619+
Parser engine to use.
620+
610621
lines : bool, default False
611622
Read the file as a json object per line.
612623
@@ -743,6 +754,7 @@ def read_json(
743754
precise_float=precise_float,
744755
date_unit=date_unit,
745756
encoding=encoding,
757+
engine=engine,
746758
lines=lines,
747759
chunksize=chunksize,
748760
compression=compression,
@@ -780,6 +792,7 @@ def __init__(
780792
precise_float: bool,
781793
date_unit,
782794
encoding,
795+
engine,
783796
lines: bool,
784797
chunksize: int | None,
785798
compression: CompressionOptions,
@@ -798,6 +811,7 @@ def __init__(
798811
self.precise_float = precise_float
799812
self.date_unit = date_unit
800813
self.encoding = encoding
814+
self.engine = engine
801815
self.compression = compression
802816
self.storage_options = storage_options
803817
self.lines = lines
@@ -816,8 +830,32 @@ def __init__(
816830
if not self.lines:
817831
raise ValueError("nrows can only be passed if lines=True")
818832

819-
data = self._get_data_from_filepath(filepath_or_buffer)
820-
self.data = self._preprocess_data(data)
833+
if engine is not None:
834+
self._engine = self._make_engine(filepath_or_buffer, self.engine)
835+
else:
836+
data = self._get_data_from_filepath(filepath_or_buffer)
837+
self.data = self._preprocess_data(data)
838+
839+
def _make_engine(
840+
self,
841+
filepath_or_buffer: FilePath | ReadBuffer[str] | ReadBuffer[bytes],
842+
engine: JSONEngine,
843+
) -> ParserBase:
844+
845+
mapping: dict[str, type[ParserBase]] = {
846+
"pyarrow": ArrowParserWrapper,
847+
"ujson": ...,
848+
}
849+
850+
if engine not in mapping:
851+
raise ValueError(
852+
f"Unknown engine: {engine} (valid options are {mapping.keys()})"
853+
)
854+
855+
if not isinstance(filepath_or_buffer, list):
856+
...
857+
858+
return mapping[engine](filepath_or_buffer)
821859

822860
def _preprocess_data(self, data):
823861
"""

0 commit comments

Comments
 (0)