11
11
from pandas ._libs .tslibs import iNaT
12
12
from pandas ._typing import JSONSerializable
13
13
from pandas .errors import AbstractMethodError
14
- from pandas .util ._decorators import deprecate_kwarg , deprecate_nonkeyword_arguments
14
+ from pandas .util ._decorators import deprecate_kwarg
15
15
16
16
from pandas .core .dtypes .common import ensure_str , is_period_dtype
17
17
18
18
from pandas import DataFrame , MultiIndex , Series , isna , to_datetime
19
19
from pandas .core .construction import create_series_with_explicit_dtype
20
20
from pandas .core .reshape .concat import concat
21
21
22
- from pandas .io .common import get_filepath_or_buffer , get_handle , infer_compression
23
- from pandas .io .json ._normalize import convert_to_line_delimits
24
- from pandas .io .json ._table_schema import build_table_schema , parse_table_schema
22
+ from pandas .io .common import (
23
+ get_filepath_or_buffer ,
24
+ get_handle ,
25
+ infer_compression ,
26
+ stringify_path ,
27
+ )
25
28
from pandas .io .parsers import _validate_integer
26
29
30
+ from ._normalize import convert_to_line_delimits
31
+ from ._table_schema import build_table_schema , parse_table_schema
32
+
27
33
loads = json .loads
28
34
dumps = json .dumps
29
35
@@ -51,11 +57,7 @@ def to_json(
51
57
"'index=False' is only valid when 'orient' is 'split' or 'table'"
52
58
)
53
59
54
- if path_or_buf is not None :
55
- path_or_buf , _ , _ , _ = get_filepath_or_buffer (
56
- path_or_buf , compression = compression , mode = "w"
57
- )
58
-
60
+ path_or_buf = stringify_path (path_or_buf )
59
61
if lines and orient != "records" :
60
62
raise ValueError ("'lines' keyword only valid when 'orient' is records" )
61
63
@@ -266,6 +268,7 @@ def __init__(
266
268
to know what the index is, forces orient to records, and forces
267
269
date_format to 'iso'.
268
270
"""
271
+
269
272
super ().__init__ (
270
273
obj ,
271
274
orient ,
@@ -306,7 +309,7 @@ def __init__(
306
309
if len (timedeltas ):
307
310
obj [timedeltas ] = obj [timedeltas ].applymap (lambda x : x .isoformat ())
308
311
# Convert PeriodIndex to datetimes before serializing
309
- if is_period_dtype (obj .index . dtype ):
312
+ if is_period_dtype (obj .index ):
310
313
obj .index = obj .index .to_timestamp ()
311
314
312
315
# exclude index from obj if index=False
@@ -345,25 +348,21 @@ def _write(
345
348
346
349
347
350
@deprecate_kwarg (old_arg_name = "numpy" , new_arg_name = None )
348
- @deprecate_nonkeyword_arguments (
349
- version = "2.0" , allowed_args = ["path_or_buf" ], stacklevel = 3
350
- )
351
351
def read_json (
352
352
path_or_buf = None ,
353
353
orient = None ,
354
354
typ = "frame" ,
355
355
dtype = None ,
356
356
convert_axes = None ,
357
357
convert_dates = True ,
358
- keep_default_dates : bool = True ,
359
- numpy : bool = False ,
360
- precise_float : bool = False ,
358
+ keep_default_dates = True ,
359
+ numpy = False ,
360
+ precise_float = False ,
361
361
date_unit = None ,
362
362
encoding = None ,
363
- lines : bool = False ,
364
- chunksize : Optional [ int ] = None ,
363
+ lines = False ,
364
+ chunksize = None ,
365
365
compression = "infer" ,
366
- nrows : Optional [int ] = None ,
367
366
):
368
367
"""
369
368
Convert a JSON string to pandas object.
@@ -441,17 +440,8 @@ def read_json(
441
440
Not applicable for ``orient='table'``.
442
441
443
442
convert_dates : bool or list of str, default True
444
- If True then default datelike columns may be converted (depending on
445
- keep_default_dates).
446
- If False, no dates will be converted.
447
- If a list of column names, then those columns will be converted and
448
- default datelike columns may also be converted (depending on
449
- keep_default_dates).
450
-
451
- keep_default_dates : bool, default True
452
- If parsing dates (convert_dates is not False), then try to parse the
453
- default datelike columns.
454
- A column label is datelike if
443
+ List of columns to parse for dates. If True, then try to parse
444
+ datelike columns. A column label is datelike if
455
445
456
446
* it ends with ``'_at'``,
457
447
@@ -463,6 +453,9 @@ def read_json(
463
453
464
454
* it is ``'date'``.
465
455
456
+ keep_default_dates : bool, default True
457
+ If parsing dates, then parse the default datelike columns.
458
+
466
459
numpy : bool, default False
467
460
Direct decoding to numpy arrays. Supports numeric data only, but
468
461
non-numeric column and index labels are supported. Note also that the
@@ -495,19 +488,16 @@ def read_json(
495
488
This can only be passed if `lines=True`.
496
489
If this is None, the file will be read into memory all at once.
497
490
491
+ .. versionadded:: 0.21.0
492
+
498
493
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
499
494
For on-the-fly decompression of on-disk data. If 'infer', then use
500
495
gzip, bz2, zip or xz if path_or_buf is a string ending in
501
496
'.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
502
497
otherwise. If using 'zip', the ZIP file must contain only one data
503
498
file to be read in. Set to None for no decompression.
504
499
505
- nrows : int, optional
506
- The number of lines from the line-delimited jsonfile that has to be read.
507
- This can only be passed if `lines=True`.
508
- If this is None, all the rows will be returned.
509
-
510
- .. versionadded:: 1.1
500
+ .. versionadded:: 0.21.0
511
501
512
502
Returns
513
503
-------
@@ -532,6 +522,7 @@ def read_json(
532
522
533
523
Examples
534
524
--------
525
+
535
526
>>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],
536
527
... index=['row 1', 'row 2'],
537
528
... columns=['col 1', 'col 2'])
@@ -577,6 +568,7 @@ def read_json(
577
568
"data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
578
569
{"index": "row 2", "col 1": "c", "col 2": "d"}]}'
579
570
"""
571
+
580
572
if orient == "table" and dtype :
581
573
raise ValueError ("cannot pass both dtype and orient='table'" )
582
574
if orient == "table" and convert_axes :
@@ -609,7 +601,6 @@ def read_json(
609
601
lines = lines ,
610
602
chunksize = chunksize ,
611
603
compression = compression ,
612
- nrows = nrows ,
613
604
)
614
605
615
606
if chunksize :
@@ -639,17 +630,17 @@ def __init__(
639
630
dtype ,
640
631
convert_axes ,
641
632
convert_dates ,
642
- keep_default_dates : bool ,
643
- numpy : bool ,
644
- precise_float : bool ,
633
+ keep_default_dates ,
634
+ numpy ,
635
+ precise_float ,
645
636
date_unit ,
646
637
encoding ,
647
- lines : bool ,
648
- chunksize : Optional [ int ] ,
638
+ lines ,
639
+ chunksize ,
649
640
compression ,
650
- nrows : Optional [int ],
651
641
):
652
642
643
+ self .path_or_buf = filepath_or_buffer
653
644
self .orient = orient
654
645
self .typ = typ
655
646
self .dtype = dtype
@@ -665,16 +656,11 @@ def __init__(
665
656
self .chunksize = chunksize
666
657
self .nrows_seen = 0
667
658
self .should_close = False
668
- self .nrows = nrows
669
659
670
660
if self .chunksize is not None :
671
661
self .chunksize = _validate_integer ("chunksize" , self .chunksize , 1 )
672
662
if not self .lines :
673
663
raise ValueError ("chunksize can only be passed if lines=True" )
674
- if self .nrows is not None :
675
- self .nrows = _validate_integer ("nrows" , self .nrows , 0 )
676
- if not self .lines :
677
- raise ValueError ("nrows can only be passed if lines=True" )
678
664
679
665
data = self ._get_data_from_filepath (filepath_or_buffer )
680
666
self .data = self ._preprocess_data (data )
@@ -687,9 +673,9 @@ def _preprocess_data(self, data):
687
673
If self.chunksize, we prepare the data for the `__next__` method.
688
674
Otherwise, we read it into memory for the `read` method.
689
675
"""
690
- if hasattr (data , "read" ) and ( not self .chunksize or not self . nrows ) :
676
+ if hasattr (data , "read" ) and not self .chunksize :
691
677
data = data .read ()
692
- if not hasattr (data , "read" ) and ( self .chunksize or self . nrows ) :
678
+ if not hasattr (data , "read" ) and self .chunksize :
693
679
data = StringIO (data )
694
680
695
681
return data
@@ -737,17 +723,11 @@ def read(self):
737
723
"""
738
724
Read the whole JSON input into a pandas object.
739
725
"""
740
- if self .lines :
741
- if self .chunksize :
742
- obj = concat (self )
743
- elif self .nrows :
744
- lines = list (islice (self .data , self .nrows ))
745
- lines_json = self ._combine_lines (lines )
746
- obj = self ._get_object_parser (lines_json )
747
- else :
748
- data = ensure_str (self .data )
749
- data = data .split ("\n " )
750
- obj = self ._get_object_parser (self ._combine_lines (data ))
726
+ if self .lines and self .chunksize :
727
+ obj = concat (self )
728
+ elif self .lines :
729
+ data = ensure_str (self .data )
730
+ obj = self ._get_object_parser (self ._combine_lines (data .split ("\n " )))
751
731
else :
752
732
obj = self ._get_object_parser (self .data )
753
733
self .close ()
@@ -794,11 +774,6 @@ def close(self):
794
774
pass
795
775
796
776
def __next__ (self ):
797
- if self .nrows :
798
- if self .nrows_seen >= self .nrows :
799
- self .close ()
800
- raise StopIteration
801
-
802
777
lines = list (islice (self .data , self .chunksize ))
803
778
if lines :
804
779
lines_json = self ._combine_lines (lines )
@@ -893,15 +868,12 @@ def _convert_axes(self):
893
868
"""
894
869
Try to convert axes.
895
870
"""
896
- for axis_name in self .obj ._AXIS_ORDERS :
871
+ for axis in self .obj ._AXIS_NUMBERS . keys () :
897
872
new_axis , result = self ._try_convert_data (
898
- name = axis_name ,
899
- data = self .obj ._get_axis (axis_name ),
900
- use_dtypes = False ,
901
- convert_dates = True ,
873
+ axis , self .obj ._get_axis (axis ), use_dtypes = False , convert_dates = True
902
874
)
903
875
if result :
904
- setattr (self .obj , axis_name , new_axis )
876
+ setattr (self .obj , axis , new_axis )
905
877
906
878
def _try_convert_types (self ):
907
879
raise AbstractMethodError (self )
@@ -910,6 +882,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
910
882
"""
911
883
Try to parse a ndarray like into a column by inferring dtype.
912
884
"""
885
+
913
886
# don't try to coerce, unless a force conversion
914
887
if use_dtypes :
915
888
if not self .dtype :
@@ -964,7 +937,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
964
937
if (new_data == data ).all ():
965
938
data = new_data
966
939
result = True
967
- except (TypeError , ValueError , OverflowError ):
940
+ except (TypeError , ValueError ):
968
941
pass
969
942
970
943
# coerce ints to 64
@@ -986,6 +959,7 @@ def _try_convert_to_date(self, data):
986
959
Try to coerce object in epoch/iso formats and integer/float in epoch
987
960
formats. Return a boolean if parsing was successful.
988
961
"""
962
+
989
963
# no conversion on empty
990
964
if not len (data ):
991
965
return data , False
@@ -1000,9 +974,9 @@ def _try_convert_to_date(self, data):
1000
974
# ignore numbers that are out of range
1001
975
if issubclass (new_data .dtype .type , np .number ):
1002
976
in_range = (
1003
- isna (new_data ._values )
977
+ isna (new_data .values )
1004
978
| (new_data > self .min_stamp )
1005
- | (new_data ._values == iNaT )
979
+ | (new_data .values == iNaT )
1006
980
)
1007
981
if not in_range .all ():
1008
982
return data , False
@@ -1011,7 +985,7 @@ def _try_convert_to_date(self, data):
1011
985
for date_unit in date_units :
1012
986
try :
1013
987
new_data = to_datetime (new_data , errors = "raise" , unit = date_unit )
1014
- except (ValueError , OverflowError , TypeError ):
988
+ except (ValueError , OverflowError ):
1015
989
continue
1016
990
return new_data , True
1017
991
return data , False
@@ -1139,6 +1113,7 @@ def _process_converter(self, f, filt=None):
1139
1113
"""
1140
1114
Take a conversion function and possibly recreate the frame.
1141
1115
"""
1116
+
1142
1117
if filt is None :
1143
1118
filt = lambda col , c : True
1144
1119
0 commit comments