43
43
ensure_str ,
44
44
is_period_dtype ,
45
45
)
46
+ from pandas .core .dtypes .generic import ABCIndex
46
47
47
48
from pandas import (
48
49
DataFrame ,
@@ -396,6 +397,7 @@ def read_json(
396
397
compression : CompressionOptions = ...,
397
398
nrows : int | None = ...,
398
399
storage_options : StorageOptions = ...,
400
+ use_nullable_dtypes : bool = ...,
399
401
) -> JsonReader [Literal ["frame" ]]:
400
402
...
401
403
@@ -419,6 +421,7 @@ def read_json(
419
421
compression : CompressionOptions = ...,
420
422
nrows : int | None = ...,
421
423
storage_options : StorageOptions = ...,
424
+ use_nullable_dtypes : bool = ...,
422
425
) -> JsonReader [Literal ["series" ]]:
423
426
...
424
427
@@ -442,6 +445,7 @@ def read_json(
442
445
compression : CompressionOptions = ...,
443
446
nrows : int | None = ...,
444
447
storage_options : StorageOptions = ...,
448
+ use_nullable_dtypes : bool = ...,
445
449
) -> Series :
446
450
...
447
451
@@ -465,6 +469,7 @@ def read_json(
465
469
compression : CompressionOptions = ...,
466
470
nrows : int | None = ...,
467
471
storage_options : StorageOptions = ...,
472
+ use_nullable_dtypes : bool = ...,
468
473
) -> DataFrame :
469
474
...
470
475
@@ -491,6 +496,7 @@ def read_json(
491
496
compression : CompressionOptions = "infer" ,
492
497
nrows : int | None = None ,
493
498
storage_options : StorageOptions = None ,
499
+ use_nullable_dtypes : bool = False ,
494
500
) -> DataFrame | Series | JsonReader :
495
501
"""
496
502
Convert a JSON string to pandas object.
@@ -629,6 +635,19 @@ def read_json(
629
635
630
636
.. versionadded:: 1.2.0
631
637
638
+ use_nullable_dtypes : bool = False
639
+ Whether or not to use nullable dtypes as default when reading data. If
640
+ set to True, nullable dtypes are used for all dtypes that have a nullable
641
+ implementation, even if no nulls are present.
642
+
643
+ The nullable dtype implementation can be configured by calling
644
+ ``pd.set_option("mode.dtype_backend", "pandas")`` to use
645
+ numpy-backed nullable dtypes or
646
+ ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
647
+ pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
648
+
649
+ .. versionadded:: 2.0
650
+
632
651
Returns
633
652
-------
634
653
Series or DataFrame
@@ -740,6 +759,7 @@ def read_json(
740
759
nrows = nrows ,
741
760
storage_options = storage_options ,
742
761
encoding_errors = encoding_errors ,
762
+ use_nullable_dtypes = use_nullable_dtypes ,
743
763
)
744
764
745
765
if chunksize :
@@ -775,6 +795,7 @@ def __init__(
775
795
nrows : int | None ,
776
796
storage_options : StorageOptions = None ,
777
797
encoding_errors : str | None = "strict" ,
798
+ use_nullable_dtypes : bool = False ,
778
799
) -> None :
779
800
780
801
self .orient = orient
@@ -794,6 +815,7 @@ def __init__(
794
815
self .nrows = nrows
795
816
self .encoding_errors = encoding_errors
796
817
self .handles : IOHandles [str ] | None = None
818
+ self .use_nullable_dtypes = use_nullable_dtypes
797
819
798
820
if self .chunksize is not None :
799
821
self .chunksize = validate_integer ("chunksize" , self .chunksize , 1 )
@@ -903,7 +925,10 @@ def read(self) -> DataFrame | Series:
903
925
obj = self ._get_object_parser (self ._combine_lines (data_lines ))
904
926
else :
905
927
obj = self ._get_object_parser (self .data )
906
- return obj
928
+ if self .use_nullable_dtypes :
929
+ return obj .convert_dtypes (infer_objects = False )
930
+ else :
931
+ return obj
907
932
908
933
def _get_object_parser (self , json ) -> DataFrame | Series :
909
934
"""
@@ -919,6 +944,7 @@ def _get_object_parser(self, json) -> DataFrame | Series:
919
944
"keep_default_dates" : self .keep_default_dates ,
920
945
"precise_float" : self .precise_float ,
921
946
"date_unit" : self .date_unit ,
947
+ "use_nullable_dtypes" : self .use_nullable_dtypes ,
922
948
}
923
949
obj = None
924
950
if typ == "frame" :
@@ -977,7 +1003,10 @@ def __next__(self) -> DataFrame | Series:
977
1003
self .close ()
978
1004
raise ex
979
1005
980
- return obj
1006
+ if self .use_nullable_dtypes :
1007
+ return obj .convert_dtypes (infer_objects = False )
1008
+ else :
1009
+ return obj
981
1010
982
1011
def __enter__ (self ) -> JsonReader [FrameSeriesStrT ]:
983
1012
return self
@@ -1013,6 +1042,7 @@ def __init__(
1013
1042
keep_default_dates : bool = False ,
1014
1043
precise_float : bool = False ,
1015
1044
date_unit = None ,
1045
+ use_nullable_dtypes : bool = False ,
1016
1046
) -> None :
1017
1047
self .json = json
1018
1048
@@ -1037,6 +1067,7 @@ def __init__(
1037
1067
self .date_unit = date_unit
1038
1068
self .keep_default_dates = keep_default_dates
1039
1069
self .obj : DataFrame | Series | None = None
1070
+ self .use_nullable_dtypes = use_nullable_dtypes
1040
1071
1041
1072
def check_keys_split (self , decoded ) -> None :
1042
1073
"""
@@ -1119,7 +1150,10 @@ def _try_convert_data(
1119
1150
if result :
1120
1151
return new_data , True
1121
1152
1122
- if data .dtype == "object" :
1153
+ if self .use_nullable_dtypes and not isinstance (data , ABCIndex ):
1154
+ # Fall through for conversion later on
1155
+ return data , True
1156
+ elif data .dtype == "object" :
1123
1157
1124
1158
# try float
1125
1159
try :
0 commit comments