Skip to content

Commit d393a66

Browse files
committed
Merge remote-tracking branch 'origin/master' into mcmali-s3-pub-test
2 parents 5d63555 + b29404e commit d393a66

File tree

3 files changed

+160
-251
lines changed

3 files changed

+160
-251
lines changed

deepsource.toml

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
version = 1
2+
3+
[[analyzers]]
4+
name = "python"
5+
enabled = true
6+
7+
[analyzers.meta]
8+
runtime_version = "3.x.x"

pandas/io/json/_json.py

+51-76
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,25 @@
1111
from pandas._libs.tslibs import iNaT
1212
from pandas._typing import JSONSerializable
1313
from pandas.errors import AbstractMethodError
14-
from pandas.util._decorators import deprecate_kwarg, deprecate_nonkeyword_arguments
14+
from pandas.util._decorators import deprecate_kwarg
1515

1616
from pandas.core.dtypes.common import ensure_str, is_period_dtype
1717

1818
from pandas import DataFrame, MultiIndex, Series, isna, to_datetime
1919
from pandas.core.construction import create_series_with_explicit_dtype
2020
from pandas.core.reshape.concat import concat
2121

22-
from pandas.io.common import get_filepath_or_buffer, get_handle, infer_compression
23-
from pandas.io.json._normalize import convert_to_line_delimits
24-
from pandas.io.json._table_schema import build_table_schema, parse_table_schema
22+
from pandas.io.common import (
23+
get_filepath_or_buffer,
24+
get_handle,
25+
infer_compression,
26+
stringify_path,
27+
)
2528
from pandas.io.parsers import _validate_integer
2629

30+
from ._normalize import convert_to_line_delimits
31+
from ._table_schema import build_table_schema, parse_table_schema
32+
2733
loads = json.loads
2834
dumps = json.dumps
2935

@@ -51,11 +57,7 @@ def to_json(
5157
"'index=False' is only valid when 'orient' is 'split' or 'table'"
5258
)
5359

54-
if path_or_buf is not None:
55-
path_or_buf, _, _, _ = get_filepath_or_buffer(
56-
path_or_buf, compression=compression, mode="w"
57-
)
58-
60+
path_or_buf = stringify_path(path_or_buf)
5961
if lines and orient != "records":
6062
raise ValueError("'lines' keyword only valid when 'orient' is records")
6163

@@ -266,6 +268,7 @@ def __init__(
266268
to know what the index is, forces orient to records, and forces
267269
date_format to 'iso'.
268270
"""
271+
269272
super().__init__(
270273
obj,
271274
orient,
@@ -306,7 +309,7 @@ def __init__(
306309
if len(timedeltas):
307310
obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat())
308311
# Convert PeriodIndex to datetimes before serializing
309-
if is_period_dtype(obj.index.dtype):
312+
if is_period_dtype(obj.index):
310313
obj.index = obj.index.to_timestamp()
311314

312315
# exclude index from obj if index=False
@@ -345,25 +348,21 @@ def _write(
345348

346349

347350
@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None)
348-
@deprecate_nonkeyword_arguments(
349-
version="2.0", allowed_args=["path_or_buf"], stacklevel=3
350-
)
351351
def read_json(
352352
path_or_buf=None,
353353
orient=None,
354354
typ="frame",
355355
dtype=None,
356356
convert_axes=None,
357357
convert_dates=True,
358-
keep_default_dates: bool = True,
359-
numpy: bool = False,
360-
precise_float: bool = False,
358+
keep_default_dates=True,
359+
numpy=False,
360+
precise_float=False,
361361
date_unit=None,
362362
encoding=None,
363-
lines: bool = False,
364-
chunksize: Optional[int] = None,
363+
lines=False,
364+
chunksize=None,
365365
compression="infer",
366-
nrows: Optional[int] = None,
367366
):
368367
"""
369368
Convert a JSON string to pandas object.
@@ -441,17 +440,8 @@ def read_json(
441440
Not applicable for ``orient='table'``.
442441
443442
convert_dates : bool or list of str, default True
444-
If True then default datelike columns may be converted (depending on
445-
keep_default_dates).
446-
If False, no dates will be converted.
447-
If a list of column names, then those columns will be converted and
448-
default datelike columns may also be converted (depending on
449-
keep_default_dates).
450-
451-
keep_default_dates : bool, default True
452-
If parsing dates (convert_dates is not False), then try to parse the
453-
default datelike columns.
454-
A column label is datelike if
443+
List of columns to parse for dates. If True, then try to parse
444+
datelike columns. A column label is datelike if
455445
456446
* it ends with ``'_at'``,
457447
@@ -463,6 +453,9 @@ def read_json(
463453
464454
* it is ``'date'``.
465455
456+
keep_default_dates : bool, default True
457+
If parsing dates, then parse the default datelike columns.
458+
466459
numpy : bool, default False
467460
Direct decoding to numpy arrays. Supports numeric data only, but
468461
non-numeric column and index labels are supported. Note also that the
@@ -495,19 +488,16 @@ def read_json(
495488
This can only be passed if `lines=True`.
496489
If this is None, the file will be read into memory all at once.
497490
491+
.. versionadded:: 0.21.0
492+
498493
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
499494
For on-the-fly decompression of on-disk data. If 'infer', then use
500495
gzip, bz2, zip or xz if path_or_buf is a string ending in
501496
'.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
502497
otherwise. If using 'zip', the ZIP file must contain only one data
503498
file to be read in. Set to None for no decompression.
504499
505-
nrows : int, optional
506-
The number of lines from the line-delimited jsonfile that has to be read.
507-
This can only be passed if `lines=True`.
508-
If this is None, all the rows will be returned.
509-
510-
.. versionadded:: 1.1
500+
.. versionadded:: 0.21.0
511501
512502
Returns
513503
-------
@@ -532,6 +522,7 @@ def read_json(
532522
533523
Examples
534524
--------
525+
535526
>>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],
536527
... index=['row 1', 'row 2'],
537528
... columns=['col 1', 'col 2'])
@@ -577,6 +568,7 @@ def read_json(
577568
"data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
578569
{"index": "row 2", "col 1": "c", "col 2": "d"}]}'
579570
"""
571+
580572
if orient == "table" and dtype:
581573
raise ValueError("cannot pass both dtype and orient='table'")
582574
if orient == "table" and convert_axes:
@@ -609,7 +601,6 @@ def read_json(
609601
lines=lines,
610602
chunksize=chunksize,
611603
compression=compression,
612-
nrows=nrows,
613604
)
614605

615606
if chunksize:
@@ -639,17 +630,17 @@ def __init__(
639630
dtype,
640631
convert_axes,
641632
convert_dates,
642-
keep_default_dates: bool,
643-
numpy: bool,
644-
precise_float: bool,
633+
keep_default_dates,
634+
numpy,
635+
precise_float,
645636
date_unit,
646637
encoding,
647-
lines: bool,
648-
chunksize: Optional[int],
638+
lines,
639+
chunksize,
649640
compression,
650-
nrows: Optional[int],
651641
):
652642

643+
self.path_or_buf = filepath_or_buffer
653644
self.orient = orient
654645
self.typ = typ
655646
self.dtype = dtype
@@ -665,16 +656,11 @@ def __init__(
665656
self.chunksize = chunksize
666657
self.nrows_seen = 0
667658
self.should_close = False
668-
self.nrows = nrows
669659

670660
if self.chunksize is not None:
671661
self.chunksize = _validate_integer("chunksize", self.chunksize, 1)
672662
if not self.lines:
673663
raise ValueError("chunksize can only be passed if lines=True")
674-
if self.nrows is not None:
675-
self.nrows = _validate_integer("nrows", self.nrows, 0)
676-
if not self.lines:
677-
raise ValueError("nrows can only be passed if lines=True")
678664

679665
data = self._get_data_from_filepath(filepath_or_buffer)
680666
self.data = self._preprocess_data(data)
@@ -687,9 +673,9 @@ def _preprocess_data(self, data):
687673
If self.chunksize, we prepare the data for the `__next__` method.
688674
Otherwise, we read it into memory for the `read` method.
689675
"""
690-
if hasattr(data, "read") and (not self.chunksize or not self.nrows):
676+
if hasattr(data, "read") and not self.chunksize:
691677
data = data.read()
692-
if not hasattr(data, "read") and (self.chunksize or self.nrows):
678+
if not hasattr(data, "read") and self.chunksize:
693679
data = StringIO(data)
694680

695681
return data
@@ -737,17 +723,11 @@ def read(self):
737723
"""
738724
Read the whole JSON input into a pandas object.
739725
"""
740-
if self.lines:
741-
if self.chunksize:
742-
obj = concat(self)
743-
elif self.nrows:
744-
lines = list(islice(self.data, self.nrows))
745-
lines_json = self._combine_lines(lines)
746-
obj = self._get_object_parser(lines_json)
747-
else:
748-
data = ensure_str(self.data)
749-
data = data.split("\n")
750-
obj = self._get_object_parser(self._combine_lines(data))
726+
if self.lines and self.chunksize:
727+
obj = concat(self)
728+
elif self.lines:
729+
data = ensure_str(self.data)
730+
obj = self._get_object_parser(self._combine_lines(data.split("\n")))
751731
else:
752732
obj = self._get_object_parser(self.data)
753733
self.close()
@@ -794,11 +774,6 @@ def close(self):
794774
pass
795775

796776
def __next__(self):
797-
if self.nrows:
798-
if self.nrows_seen >= self.nrows:
799-
self.close()
800-
raise StopIteration
801-
802777
lines = list(islice(self.data, self.chunksize))
803778
if lines:
804779
lines_json = self._combine_lines(lines)
@@ -893,15 +868,12 @@ def _convert_axes(self):
893868
"""
894869
Try to convert axes.
895870
"""
896-
for axis_name in self.obj._AXIS_ORDERS:
871+
for axis in self.obj._AXIS_NUMBERS.keys():
897872
new_axis, result = self._try_convert_data(
898-
name=axis_name,
899-
data=self.obj._get_axis(axis_name),
900-
use_dtypes=False,
901-
convert_dates=True,
873+
axis, self.obj._get_axis(axis), use_dtypes=False, convert_dates=True
902874
)
903875
if result:
904-
setattr(self.obj, axis_name, new_axis)
876+
setattr(self.obj, axis, new_axis)
905877

906878
def _try_convert_types(self):
907879
raise AbstractMethodError(self)
@@ -910,6 +882,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
910882
"""
911883
Try to parse a ndarray like into a column by inferring dtype.
912884
"""
885+
913886
# don't try to coerce, unless a force conversion
914887
if use_dtypes:
915888
if not self.dtype:
@@ -964,7 +937,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
964937
if (new_data == data).all():
965938
data = new_data
966939
result = True
967-
except (TypeError, ValueError, OverflowError):
940+
except (TypeError, ValueError):
968941
pass
969942

970943
# coerce ints to 64
@@ -986,6 +959,7 @@ def _try_convert_to_date(self, data):
986959
Try to coerce object in epoch/iso formats and integer/float in epoch
987960
formats. Return a boolean if parsing was successful.
988961
"""
962+
989963
# no conversion on empty
990964
if not len(data):
991965
return data, False
@@ -1000,9 +974,9 @@ def _try_convert_to_date(self, data):
1000974
# ignore numbers that are out of range
1001975
if issubclass(new_data.dtype.type, np.number):
1002976
in_range = (
1003-
isna(new_data._values)
977+
isna(new_data.values)
1004978
| (new_data > self.min_stamp)
1005-
| (new_data._values == iNaT)
979+
| (new_data.values == iNaT)
1006980
)
1007981
if not in_range.all():
1008982
return data, False
@@ -1011,7 +985,7 @@ def _try_convert_to_date(self, data):
1011985
for date_unit in date_units:
1012986
try:
1013987
new_data = to_datetime(new_data, errors="raise", unit=date_unit)
1014-
except (ValueError, OverflowError, TypeError):
988+
except (ValueError, OverflowError):
1015989
continue
1016990
return new_data, True
1017991
return data, False
@@ -1139,6 +1113,7 @@ def _process_converter(self, f, filt=None):
11391113
"""
11401114
Take a conversion function and possibly recreate the frame.
11411115
"""
1116+
11421117
if filt is None:
11431118
filt = lambda col, c: True
11441119

0 commit comments

Comments
 (0)