Skip to content

Commit d6dd877

Browse files
committed
Merge remote-tracking branch 'origin/master' into mcmali-s3-pub-test
2 parents 03ee472 + b29404e commit d6dd877

File tree

3 files changed

+163
-289
lines changed

3 files changed

+163
-289
lines changed

deepsource.toml

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
version = 1
2+
3+
[[analyzers]]
4+
name = "python"
5+
enabled = true
6+
7+
[analyzers.meta]
8+
runtime_version = "3.x.x"

pandas/io/json/_json.py

+52-80
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections import abc
22
import functools
3-
from io import BytesIO, StringIO
3+
from io import StringIO
44
from itertools import islice
55
import os
66
from typing import Any, Callable, Optional, Type
@@ -11,19 +11,25 @@
1111
from pandas._libs.tslibs import iNaT
1212
from pandas._typing import JSONSerializable
1313
from pandas.errors import AbstractMethodError
14-
from pandas.util._decorators import deprecate_kwarg, deprecate_nonkeyword_arguments
14+
from pandas.util._decorators import deprecate_kwarg
1515

1616
from pandas.core.dtypes.common import ensure_str, is_period_dtype
1717

1818
from pandas import DataFrame, MultiIndex, Series, isna, to_datetime
1919
from pandas.core.construction import create_series_with_explicit_dtype
2020
from pandas.core.reshape.concat import concat
2121

22-
from pandas.io.common import get_filepath_or_buffer, get_handle, infer_compression
23-
from pandas.io.json._normalize import convert_to_line_delimits
24-
from pandas.io.json._table_schema import build_table_schema, parse_table_schema
22+
from pandas.io.common import (
23+
get_filepath_or_buffer,
24+
get_handle,
25+
infer_compression,
26+
stringify_path,
27+
)
2528
from pandas.io.parsers import _validate_integer
2629

30+
from ._normalize import convert_to_line_delimits
31+
from ._table_schema import build_table_schema, parse_table_schema
32+
2733
loads = json.loads
2834
dumps = json.dumps
2935

@@ -51,11 +57,7 @@ def to_json(
5157
"'index=False' is only valid when 'orient' is 'split' or 'table'"
5258
)
5359

54-
if path_or_buf is not None:
55-
path_or_buf, _, _, _ = get_filepath_or_buffer(
56-
path_or_buf, compression=compression, mode="w"
57-
)
58-
60+
path_or_buf = stringify_path(path_or_buf)
5961
if lines and orient != "records":
6062
raise ValueError("'lines' keyword only valid when 'orient' is records")
6163

@@ -266,6 +268,7 @@ def __init__(
266268
to know what the index is, forces orient to records, and forces
267269
date_format to 'iso'.
268270
"""
271+
269272
super().__init__(
270273
obj,
271274
orient,
@@ -306,7 +309,7 @@ def __init__(
306309
if len(timedeltas):
307310
obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat())
308311
# Convert PeriodIndex to datetimes before serializing
309-
if is_period_dtype(obj.index.dtype):
312+
if is_period_dtype(obj.index):
310313
obj.index = obj.index.to_timestamp()
311314

312315
# exclude index from obj if index=False
@@ -345,25 +348,21 @@ def _write(
345348

346349

347350
@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None)
348-
@deprecate_nonkeyword_arguments(
349-
version="2.0", allowed_args=["path_or_buf"], stacklevel=3
350-
)
351351
def read_json(
352352
path_or_buf=None,
353353
orient=None,
354354
typ="frame",
355355
dtype=None,
356356
convert_axes=None,
357357
convert_dates=True,
358-
keep_default_dates: bool = True,
359-
numpy: bool = False,
360-
precise_float: bool = False,
358+
keep_default_dates=True,
359+
numpy=False,
360+
precise_float=False,
361361
date_unit=None,
362362
encoding=None,
363-
lines: bool = False,
364-
chunksize: Optional[int] = None,
363+
lines=False,
364+
chunksize=None,
365365
compression="infer",
366-
nrows: Optional[int] = None,
367366
):
368367
"""
369368
Convert a JSON string to pandas object.
@@ -441,17 +440,8 @@ def read_json(
441440
Not applicable for ``orient='table'``.
442441
443442
convert_dates : bool or list of str, default True
444-
If True then default datelike columns may be converted (depending on
445-
keep_default_dates).
446-
If False, no dates will be converted.
447-
If a list of column names, then those columns will be converted and
448-
default datelike columns may also be converted (depending on
449-
keep_default_dates).
450-
451-
keep_default_dates : bool, default True
452-
If parsing dates (convert_dates is not False), then try to parse the
453-
default datelike columns.
454-
A column label is datelike if
443+
List of columns to parse for dates. If True, then try to parse
444+
datelike columns. A column label is datelike if
455445
456446
* it ends with ``'_at'``,
457447
@@ -463,6 +453,9 @@ def read_json(
463453
464454
* it is ``'date'``.
465455
456+
keep_default_dates : bool, default True
457+
If parsing dates, then parse the default datelike columns.
458+
466459
numpy : bool, default False
467460
Direct decoding to numpy arrays. Supports numeric data only, but
468461
non-numeric column and index labels are supported. Note also that the
@@ -495,19 +488,16 @@ def read_json(
495488
This can only be passed if `lines=True`.
496489
If this is None, the file will be read into memory all at once.
497490
491+
.. versionadded:: 0.21.0
492+
498493
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
499494
For on-the-fly decompression of on-disk data. If 'infer', then use
500495
gzip, bz2, zip or xz if path_or_buf is a string ending in
501496
'.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
502497
otherwise. If using 'zip', the ZIP file must contain only one data
503498
file to be read in. Set to None for no decompression.
504499
505-
nrows : int, optional
506-
The number of lines from the line-delimited jsonfile that has to be read.
507-
This can only be passed if `lines=True`.
508-
If this is None, all the rows will be returned.
509-
510-
.. versionadded:: 1.1
500+
.. versionadded:: 0.21.0
511501
512502
Returns
513503
-------
@@ -532,6 +522,7 @@ def read_json(
532522
533523
Examples
534524
--------
525+
535526
>>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],
536527
... index=['row 1', 'row 2'],
537528
... columns=['col 1', 'col 2'])
@@ -577,6 +568,7 @@ def read_json(
577568
"data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
578569
{"index": "row 2", "col 1": "c", "col 2": "d"}]}'
579570
"""
571+
580572
if orient == "table" and dtype:
581573
raise ValueError("cannot pass both dtype and orient='table'")
582574
if orient == "table" and convert_axes:
@@ -609,7 +601,6 @@ def read_json(
609601
lines=lines,
610602
chunksize=chunksize,
611603
compression=compression,
612-
nrows=nrows,
613604
)
614605

615606
if chunksize:
@@ -639,17 +630,17 @@ def __init__(
639630
dtype,
640631
convert_axes,
641632
convert_dates,
642-
keep_default_dates: bool,
643-
numpy: bool,
644-
precise_float: bool,
633+
keep_default_dates,
634+
numpy,
635+
precise_float,
645636
date_unit,
646637
encoding,
647-
lines: bool,
648-
chunksize: Optional[int],
638+
lines,
639+
chunksize,
649640
compression,
650-
nrows: Optional[int],
651641
):
652642

643+
self.path_or_buf = filepath_or_buffer
653644
self.orient = orient
654645
self.typ = typ
655646
self.dtype = dtype
@@ -665,16 +656,11 @@ def __init__(
665656
self.chunksize = chunksize
666657
self.nrows_seen = 0
667658
self.should_close = False
668-
self.nrows = nrows
669659

670660
if self.chunksize is not None:
671661
self.chunksize = _validate_integer("chunksize", self.chunksize, 1)
672662
if not self.lines:
673663
raise ValueError("chunksize can only be passed if lines=True")
674-
if self.nrows is not None:
675-
self.nrows = _validate_integer("nrows", self.nrows, 0)
676-
if not self.lines:
677-
raise ValueError("nrows can only be passed if lines=True")
678664

679665
data = self._get_data_from_filepath(filepath_or_buffer)
680666
self.data = self._preprocess_data(data)
@@ -687,9 +673,9 @@ def _preprocess_data(self, data):
687673
If self.chunksize, we prepare the data for the `__next__` method.
688674
Otherwise, we read it into memory for the `read` method.
689675
"""
690-
if hasattr(data, "read") and (not self.chunksize or not self.nrows):
676+
if hasattr(data, "read") and not self.chunksize:
691677
data = data.read()
692-
if not hasattr(data, "read") and (self.chunksize or self.nrows):
678+
if not hasattr(data, "read") and self.chunksize:
693679
data = StringIO(data)
694680

695681
return data
@@ -724,9 +710,6 @@ def _get_data_from_filepath(self, filepath_or_buffer):
724710
self.should_close = True
725711
self.open_stream = data
726712

727-
if isinstance(data, BytesIO):
728-
data = data.getvalue().decode()
729-
730713
return data
731714

732715
def _combine_lines(self, lines) -> str:
@@ -740,17 +723,11 @@ def read(self):
740723
"""
741724
Read the whole JSON input into a pandas object.
742725
"""
743-
if self.lines:
744-
if self.chunksize:
745-
obj = concat(self)
746-
elif self.nrows:
747-
lines = list(islice(self.data, self.nrows))
748-
lines_json = self._combine_lines(lines)
749-
obj = self._get_object_parser(lines_json)
750-
else:
751-
data = ensure_str(self.data)
752-
data = data.split("\n")
753-
obj = self._get_object_parser(self._combine_lines(data))
726+
if self.lines and self.chunksize:
727+
obj = concat(self)
728+
elif self.lines:
729+
data = ensure_str(self.data)
730+
obj = self._get_object_parser(self._combine_lines(data.split("\n")))
754731
else:
755732
obj = self._get_object_parser(self.data)
756733
self.close()
@@ -797,11 +774,6 @@ def close(self):
797774
pass
798775

799776
def __next__(self):
800-
if self.nrows:
801-
if self.nrows_seen >= self.nrows:
802-
self.close()
803-
raise StopIteration
804-
805777
lines = list(islice(self.data, self.chunksize))
806778
if lines:
807779
lines_json = self._combine_lines(lines)
@@ -896,15 +868,12 @@ def _convert_axes(self):
896868
"""
897869
Try to convert axes.
898870
"""
899-
for axis_name in self.obj._AXIS_ORDERS:
871+
for axis in self.obj._AXIS_NUMBERS.keys():
900872
new_axis, result = self._try_convert_data(
901-
name=axis_name,
902-
data=self.obj._get_axis(axis_name),
903-
use_dtypes=False,
904-
convert_dates=True,
873+
axis, self.obj._get_axis(axis), use_dtypes=False, convert_dates=True
905874
)
906875
if result:
907-
setattr(self.obj, axis_name, new_axis)
876+
setattr(self.obj, axis, new_axis)
908877

909878
def _try_convert_types(self):
910879
raise AbstractMethodError(self)
@@ -913,6 +882,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
913882
"""
914883
Try to parse a ndarray like into a column by inferring dtype.
915884
"""
885+
916886
# don't try to coerce, unless a force conversion
917887
if use_dtypes:
918888
if not self.dtype:
@@ -967,7 +937,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
967937
if (new_data == data).all():
968938
data = new_data
969939
result = True
970-
except (TypeError, ValueError, OverflowError):
940+
except (TypeError, ValueError):
971941
pass
972942

973943
# coerce ints to 64
@@ -989,6 +959,7 @@ def _try_convert_to_date(self, data):
989959
Try to coerce object in epoch/iso formats and integer/float in epoch
990960
formats. Return a boolean if parsing was successful.
991961
"""
962+
992963
# no conversion on empty
993964
if not len(data):
994965
return data, False
@@ -1003,9 +974,9 @@ def _try_convert_to_date(self, data):
1003974
# ignore numbers that are out of range
1004975
if issubclass(new_data.dtype.type, np.number):
1005976
in_range = (
1006-
isna(new_data._values)
977+
isna(new_data.values)
1007978
| (new_data > self.min_stamp)
1008-
| (new_data._values == iNaT)
979+
| (new_data.values == iNaT)
1009980
)
1010981
if not in_range.all():
1011982
return data, False
@@ -1014,7 +985,7 @@ def _try_convert_to_date(self, data):
1014985
for date_unit in date_units:
1015986
try:
1016987
new_data = to_datetime(new_data, errors="raise", unit=date_unit)
1017-
except (ValueError, OverflowError, TypeError):
988+
except (ValueError, OverflowError):
1018989
continue
1019990
return new_data, True
1020991
return data, False
@@ -1142,6 +1113,7 @@ def _process_converter(self, f, filt=None):
11421113
"""
11431114
Take a conversion function and possibly recreate the frame.
11441115
"""
1116+
11451117
if filt is None:
11461118
filt = lambda col, c: True
11471119

0 commit comments

Comments
 (0)