1
1
from collections import abc
2
2
import functools
3
- from io import BytesIO , StringIO
3
+ from io import StringIO
4
4
from itertools import islice
5
5
import os
6
6
from typing import Any , Callable , Optional , Type
11
11
from pandas ._libs .tslibs import iNaT
12
12
from pandas ._typing import JSONSerializable
13
13
from pandas .errors import AbstractMethodError
14
- from pandas .util ._decorators import deprecate_kwarg , deprecate_nonkeyword_arguments
14
+ from pandas .util ._decorators import deprecate_kwarg
15
15
16
16
from pandas .core .dtypes .common import ensure_str , is_period_dtype
17
17
18
18
from pandas import DataFrame , MultiIndex , Series , isna , to_datetime
19
19
from pandas .core .construction import create_series_with_explicit_dtype
20
20
from pandas .core .reshape .concat import concat
21
21
22
- from pandas .io .common import get_filepath_or_buffer , get_handle , infer_compression
23
- from pandas .io .json ._normalize import convert_to_line_delimits
24
- from pandas .io .json ._table_schema import build_table_schema , parse_table_schema
22
+ from pandas .io .common import (
23
+ get_filepath_or_buffer ,
24
+ get_handle ,
25
+ infer_compression ,
26
+ stringify_path ,
27
+ )
25
28
from pandas .io .parsers import _validate_integer
26
29
30
+ from ._normalize import convert_to_line_delimits
31
+ from ._table_schema import build_table_schema , parse_table_schema
32
+
27
33
loads = json .loads
28
34
dumps = json .dumps
29
35
@@ -51,11 +57,7 @@ def to_json(
51
57
"'index=False' is only valid when 'orient' is 'split' or 'table'"
52
58
)
53
59
54
- if path_or_buf is not None :
55
- path_or_buf , _ , _ , _ = get_filepath_or_buffer (
56
- path_or_buf , compression = compression , mode = "w"
57
- )
58
-
60
+ path_or_buf = stringify_path (path_or_buf )
59
61
if lines and orient != "records" :
60
62
raise ValueError ("'lines' keyword only valid when 'orient' is records" )
61
63
@@ -266,6 +268,7 @@ def __init__(
266
268
to know what the index is, forces orient to records, and forces
267
269
date_format to 'iso'.
268
270
"""
271
+
269
272
super ().__init__ (
270
273
obj ,
271
274
orient ,
@@ -306,7 +309,7 @@ def __init__(
306
309
if len (timedeltas ):
307
310
obj [timedeltas ] = obj [timedeltas ].applymap (lambda x : x .isoformat ())
308
311
# Convert PeriodIndex to datetimes before serializing
309
- if is_period_dtype (obj .index . dtype ):
312
+ if is_period_dtype (obj .index ):
310
313
obj .index = obj .index .to_timestamp ()
311
314
312
315
# exclude index from obj if index=False
@@ -345,25 +348,21 @@ def _write(
345
348
346
349
347
350
@deprecate_kwarg (old_arg_name = "numpy" , new_arg_name = None )
348
- @deprecate_nonkeyword_arguments (
349
- version = "2.0" , allowed_args = ["path_or_buf" ], stacklevel = 3
350
- )
351
351
def read_json (
352
352
path_or_buf = None ,
353
353
orient = None ,
354
354
typ = "frame" ,
355
355
dtype = None ,
356
356
convert_axes = None ,
357
357
convert_dates = True ,
358
- keep_default_dates : bool = True ,
359
- numpy : bool = False ,
360
- precise_float : bool = False ,
358
+ keep_default_dates = True ,
359
+ numpy = False ,
360
+ precise_float = False ,
361
361
date_unit = None ,
362
362
encoding = None ,
363
- lines : bool = False ,
364
- chunksize : Optional [ int ] = None ,
363
+ lines = False ,
364
+ chunksize = None ,
365
365
compression = "infer" ,
366
- nrows : Optional [int ] = None ,
367
366
):
368
367
"""
369
368
Convert a JSON string to pandas object.
@@ -441,17 +440,8 @@ def read_json(
441
440
Not applicable for ``orient='table'``.
442
441
443
442
convert_dates : bool or list of str, default True
444
- If True then default datelike columns may be converted (depending on
445
- keep_default_dates).
446
- If False, no dates will be converted.
447
- If a list of column names, then those columns will be converted and
448
- default datelike columns may also be converted (depending on
449
- keep_default_dates).
450
-
451
- keep_default_dates : bool, default True
452
- If parsing dates (convert_dates is not False), then try to parse the
453
- default datelike columns.
454
- A column label is datelike if
443
+ List of columns to parse for dates. If True, then try to parse
444
+ datelike columns. A column label is datelike if
455
445
456
446
* it ends with ``'_at'``,
457
447
@@ -463,6 +453,9 @@ def read_json(
463
453
464
454
* it is ``'date'``.
465
455
456
+ keep_default_dates : bool, default True
457
+ If parsing dates, then parse the default datelike columns.
458
+
466
459
numpy : bool, default False
467
460
Direct decoding to numpy arrays. Supports numeric data only, but
468
461
non-numeric column and index labels are supported. Note also that the
@@ -495,19 +488,16 @@ def read_json(
495
488
This can only be passed if `lines=True`.
496
489
If this is None, the file will be read into memory all at once.
497
490
491
+ .. versionadded:: 0.21.0
492
+
498
493
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
499
494
For on-the-fly decompression of on-disk data. If 'infer', then use
500
495
gzip, bz2, zip or xz if path_or_buf is a string ending in
501
496
'.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
502
497
otherwise. If using 'zip', the ZIP file must contain only one data
503
498
file to be read in. Set to None for no decompression.
504
499
505
- nrows : int, optional
506
- The number of lines from the line-delimited jsonfile that has to be read.
507
- This can only be passed if `lines=True`.
508
- If this is None, all the rows will be returned.
509
-
510
- .. versionadded:: 1.1
500
+ .. versionadded:: 0.21.0
511
501
512
502
Returns
513
503
-------
@@ -532,6 +522,7 @@ def read_json(
532
522
533
523
Examples
534
524
--------
525
+
535
526
>>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],
536
527
... index=['row 1', 'row 2'],
537
528
... columns=['col 1', 'col 2'])
@@ -577,6 +568,7 @@ def read_json(
577
568
"data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
578
569
{"index": "row 2", "col 1": "c", "col 2": "d"}]}'
579
570
"""
571
+
580
572
if orient == "table" and dtype :
581
573
raise ValueError ("cannot pass both dtype and orient='table'" )
582
574
if orient == "table" and convert_axes :
@@ -609,7 +601,6 @@ def read_json(
609
601
lines = lines ,
610
602
chunksize = chunksize ,
611
603
compression = compression ,
612
- nrows = nrows ,
613
604
)
614
605
615
606
if chunksize :
@@ -639,17 +630,17 @@ def __init__(
639
630
dtype ,
640
631
convert_axes ,
641
632
convert_dates ,
642
- keep_default_dates : bool ,
643
- numpy : bool ,
644
- precise_float : bool ,
633
+ keep_default_dates ,
634
+ numpy ,
635
+ precise_float ,
645
636
date_unit ,
646
637
encoding ,
647
- lines : bool ,
648
- chunksize : Optional [ int ] ,
638
+ lines ,
639
+ chunksize ,
649
640
compression ,
650
- nrows : Optional [int ],
651
641
):
652
642
643
+ self .path_or_buf = filepath_or_buffer
653
644
self .orient = orient
654
645
self .typ = typ
655
646
self .dtype = dtype
@@ -665,16 +656,11 @@ def __init__(
665
656
self .chunksize = chunksize
666
657
self .nrows_seen = 0
667
658
self .should_close = False
668
- self .nrows = nrows
669
659
670
660
if self .chunksize is not None :
671
661
self .chunksize = _validate_integer ("chunksize" , self .chunksize , 1 )
672
662
if not self .lines :
673
663
raise ValueError ("chunksize can only be passed if lines=True" )
674
- if self .nrows is not None :
675
- self .nrows = _validate_integer ("nrows" , self .nrows , 0 )
676
- if not self .lines :
677
- raise ValueError ("nrows can only be passed if lines=True" )
678
664
679
665
data = self ._get_data_from_filepath (filepath_or_buffer )
680
666
self .data = self ._preprocess_data (data )
@@ -687,9 +673,9 @@ def _preprocess_data(self, data):
687
673
If self.chunksize, we prepare the data for the `__next__` method.
688
674
Otherwise, we read it into memory for the `read` method.
689
675
"""
690
- if hasattr (data , "read" ) and ( not self .chunksize or not self . nrows ) :
676
+ if hasattr (data , "read" ) and not self .chunksize :
691
677
data = data .read ()
692
- if not hasattr (data , "read" ) and ( self .chunksize or self . nrows ) :
678
+ if not hasattr (data , "read" ) and self .chunksize :
693
679
data = StringIO (data )
694
680
695
681
return data
@@ -724,9 +710,6 @@ def _get_data_from_filepath(self, filepath_or_buffer):
724
710
self .should_close = True
725
711
self .open_stream = data
726
712
727
- if isinstance (data , BytesIO ):
728
- data = data .getvalue ().decode ()
729
-
730
713
return data
731
714
732
715
def _combine_lines (self , lines ) -> str :
@@ -740,17 +723,11 @@ def read(self):
740
723
"""
741
724
Read the whole JSON input into a pandas object.
742
725
"""
743
- if self .lines :
744
- if self .chunksize :
745
- obj = concat (self )
746
- elif self .nrows :
747
- lines = list (islice (self .data , self .nrows ))
748
- lines_json = self ._combine_lines (lines )
749
- obj = self ._get_object_parser (lines_json )
750
- else :
751
- data = ensure_str (self .data )
752
- data = data .split ("\n " )
753
- obj = self ._get_object_parser (self ._combine_lines (data ))
726
+ if self .lines and self .chunksize :
727
+ obj = concat (self )
728
+ elif self .lines :
729
+ data = ensure_str (self .data )
730
+ obj = self ._get_object_parser (self ._combine_lines (data .split ("\n " )))
754
731
else :
755
732
obj = self ._get_object_parser (self .data )
756
733
self .close ()
@@ -797,11 +774,6 @@ def close(self):
797
774
pass
798
775
799
776
def __next__ (self ):
800
- if self .nrows :
801
- if self .nrows_seen >= self .nrows :
802
- self .close ()
803
- raise StopIteration
804
-
805
777
lines = list (islice (self .data , self .chunksize ))
806
778
if lines :
807
779
lines_json = self ._combine_lines (lines )
@@ -896,15 +868,12 @@ def _convert_axes(self):
896
868
"""
897
869
Try to convert axes.
898
870
"""
899
- for axis_name in self .obj ._AXIS_ORDERS :
871
+ for axis in self .obj ._AXIS_NUMBERS . keys () :
900
872
new_axis , result = self ._try_convert_data (
901
- name = axis_name ,
902
- data = self .obj ._get_axis (axis_name ),
903
- use_dtypes = False ,
904
- convert_dates = True ,
873
+ axis , self .obj ._get_axis (axis ), use_dtypes = False , convert_dates = True
905
874
)
906
875
if result :
907
- setattr (self .obj , axis_name , new_axis )
876
+ setattr (self .obj , axis , new_axis )
908
877
909
878
def _try_convert_types (self ):
910
879
raise AbstractMethodError (self )
@@ -913,6 +882,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
913
882
"""
914
883
Try to parse a ndarray like into a column by inferring dtype.
915
884
"""
885
+
916
886
# don't try to coerce, unless a force conversion
917
887
if use_dtypes :
918
888
if not self .dtype :
@@ -967,7 +937,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
967
937
if (new_data == data ).all ():
968
938
data = new_data
969
939
result = True
970
- except (TypeError , ValueError , OverflowError ):
940
+ except (TypeError , ValueError ):
971
941
pass
972
942
973
943
# coerce ints to 64
@@ -989,6 +959,7 @@ def _try_convert_to_date(self, data):
989
959
Try to coerce object in epoch/iso formats and integer/float in epoch
990
960
formats. Return a boolean if parsing was successful.
991
961
"""
962
+
992
963
# no conversion on empty
993
964
if not len (data ):
994
965
return data , False
@@ -1003,9 +974,9 @@ def _try_convert_to_date(self, data):
1003
974
# ignore numbers that are out of range
1004
975
if issubclass (new_data .dtype .type , np .number ):
1005
976
in_range = (
1006
- isna (new_data ._values )
977
+ isna (new_data .values )
1007
978
| (new_data > self .min_stamp )
1008
- | (new_data ._values == iNaT )
979
+ | (new_data .values == iNaT )
1009
980
)
1010
981
if not in_range .all ():
1011
982
return data , False
@@ -1014,7 +985,7 @@ def _try_convert_to_date(self, data):
1014
985
for date_unit in date_units :
1015
986
try :
1016
987
new_data = to_datetime (new_data , errors = "raise" , unit = date_unit )
1017
- except (ValueError , OverflowError , TypeError ):
988
+ except (ValueError , OverflowError ):
1018
989
continue
1019
990
return new_data , True
1020
991
return data , False
@@ -1142,6 +1113,7 @@ def _process_converter(self, f, filt=None):
1142
1113
"""
1143
1114
Take a conversion function and possibly recreate the frame.
1144
1115
"""
1116
+
1145
1117
if filt is None :
1146
1118
filt = lambda col , c : True
1147
1119
0 commit comments