Merge remote-tracking branch 'origin/master' into mcmali-s3-pub-test

alimcmaster1 · alimcmaster1 · commit d6dd87786e14 · 2020-07-12T20:59:35.000+01:00
diff --git a/deepsource.toml b/deepsource.toml
@@ -0,0 +1,8 @@
+version = 1
+
+[[analyzers]]
+name = "python"
+enabled = true
+
+  [analyzers.meta]
+  runtime_version = "3.x.x"
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -1,6 +1,6 @@
 from collections import abc
 import functools
-from io import BytesIO, StringIO
+from io import StringIO
 from itertools import islice
 import os
 from typing import Any, Callable, Optional, Type
@@ -11,19 +11,25 @@
 from pandas._libs.tslibs import iNaT
 from pandas._typing import JSONSerializable
 from pandas.errors import AbstractMethodError
-from pandas.util._decorators import deprecate_kwarg, deprecate_nonkeyword_arguments
+from pandas.util._decorators import deprecate_kwarg
 
 from pandas.core.dtypes.common import ensure_str, is_period_dtype
 
 from pandas import DataFrame, MultiIndex, Series, isna, to_datetime
 from pandas.core.construction import create_series_with_explicit_dtype
 from pandas.core.reshape.concat import concat
 
-from pandas.io.common import get_filepath_or_buffer, get_handle, infer_compression
-from pandas.io.json._normalize import convert_to_line_delimits
-from pandas.io.json._table_schema import build_table_schema, parse_table_schema
+from pandas.io.common import (
+    get_filepath_or_buffer,
+    get_handle,
+    infer_compression,
+    stringify_path,
+)
 from pandas.io.parsers import _validate_integer
 
+from ._normalize import convert_to_line_delimits
+from ._table_schema import build_table_schema, parse_table_schema
+
 loads = json.loads
 dumps = json.dumps
 
@@ -51,11 +57,7 @@ def to_json(
             "'index=False' is only valid when 'orient' is 'split' or 'table'"
         )
 
-    if path_or_buf is not None:
-        path_or_buf, _, _, _ = get_filepath_or_buffer(
-            path_or_buf, compression=compression, mode="w"
-        )
-
+    path_or_buf = stringify_path(path_or_buf)
     if lines and orient != "records":
         raise ValueError("'lines' keyword only valid when 'orient' is records")
 
@@ -266,6 +268,7 @@ def __init__(
         to know what the index is, forces orient to records, and forces
         date_format to 'iso'.
         """
+
         super().__init__(
             obj,
             orient,
@@ -306,7 +309,7 @@ def __init__(
         if len(timedeltas):
             obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat())
         # Convert PeriodIndex to datetimes before serializing
-        if is_period_dtype(obj.index.dtype):
+        if is_period_dtype(obj.index):
             obj.index = obj.index.to_timestamp()
 
         # exclude index from obj if index=False
@@ -345,25 +348,21 @@ def _write(
 
 
 @deprecate_kwarg(old_arg_name="numpy", new_arg_name=None)
-@deprecate_nonkeyword_arguments(
-    version="2.0", allowed_args=["path_or_buf"], stacklevel=3
-)
 def read_json(
     path_or_buf=None,
     orient=None,
     typ="frame",
     dtype=None,
     convert_axes=None,
     convert_dates=True,
-    keep_default_dates: bool = True,
-    numpy: bool = False,
-    precise_float: bool = False,
+    keep_default_dates=True,
+    numpy=False,
+    precise_float=False,
     date_unit=None,
     encoding=None,
-    lines: bool = False,
-    chunksize: Optional[int] = None,
+    lines=False,
+    chunksize=None,
     compression="infer",
-    nrows: Optional[int] = None,
 ):
     """
     Convert a JSON string to pandas object.
@@ -441,17 +440,8 @@ def read_json(
            Not applicable for ``orient='table'``.
 
     convert_dates : bool or list of str, default True
-        If True then default datelike columns may be converted (depending on
-        keep_default_dates).
-        If False, no dates will be converted.
-        If a list of column names, then those columns will be converted and
-        default datelike columns may also be converted (depending on
-        keep_default_dates).
-
-    keep_default_dates : bool, default True
-        If parsing dates (convert_dates is not False), then try to parse the
-        default datelike columns.
-        A column label is datelike if
+        List of columns to parse for dates. If True, then try to parse
+        datelike columns. A column label is datelike if
 
         * it ends with ``'_at'``,
 
@@ -463,6 +453,9 @@ def read_json(
 
         * it is ``'date'``.
 
+    keep_default_dates : bool, default True
+        If parsing dates, then parse the default datelike columns.
+
     numpy : bool, default False
         Direct decoding to numpy arrays. Supports numeric data only, but
         non-numeric column and index labels are supported. Note also that the
@@ -495,19 +488,16 @@ def read_json(
         This can only be passed if `lines=True`.
         If this is None, the file will be read into memory all at once.
 
+        .. versionadded:: 0.21.0
+
     compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
         For on-the-fly decompression of on-disk data. If 'infer', then use
         gzip, bz2, zip or xz if path_or_buf is a string ending in
         '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
         otherwise. If using 'zip', the ZIP file must contain only one data
         file to be read in. Set to None for no decompression.
 
-    nrows : int, optional
-        The number of lines from the line-delimited jsonfile that has to be read.
-        This can only be passed if `lines=True`.
-        If this is None, all the rows will be returned.
-
-        .. versionadded:: 1.1
+        .. versionadded:: 0.21.0
 
     Returns
     -------
@@ -532,6 +522,7 @@ def read_json(
 
     Examples
     --------
+
     >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],
     ...                   index=['row 1', 'row 2'],
     ...                   columns=['col 1', 'col 2'])
@@ -577,6 +568,7 @@ def read_json(
         "data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
                 {"index": "row 2", "col 1": "c", "col 2": "d"}]}'
     """
+
     if orient == "table" and dtype:
         raise ValueError("cannot pass both dtype and orient='table'")
     if orient == "table" and convert_axes:
@@ -609,7 +601,6 @@ def read_json(
         lines=lines,
         chunksize=chunksize,
         compression=compression,
-        nrows=nrows,
     )
 
     if chunksize:
@@ -639,17 +630,17 @@ def __init__(
         dtype,
         convert_axes,
         convert_dates,
-        keep_default_dates: bool,
-        numpy: bool,
-        precise_float: bool,
+        keep_default_dates,
+        numpy,
+        precise_float,
         date_unit,
         encoding,
-        lines: bool,
-        chunksize: Optional[int],
+        lines,
+        chunksize,
         compression,
-        nrows: Optional[int],
     ):
 
+        self.path_or_buf = filepath_or_buffer
         self.orient = orient
         self.typ = typ
         self.dtype = dtype
@@ -665,16 +656,11 @@ def __init__(
         self.chunksize = chunksize
         self.nrows_seen = 0
         self.should_close = False
-        self.nrows = nrows
 
         if self.chunksize is not None:
             self.chunksize = _validate_integer("chunksize", self.chunksize, 1)
             if not self.lines:
                 raise ValueError("chunksize can only be passed if lines=True")
-        if self.nrows is not None:
-            self.nrows = _validate_integer("nrows", self.nrows, 0)
-            if not self.lines:
-                raise ValueError("nrows can only be passed if lines=True")
 
         data = self._get_data_from_filepath(filepath_or_buffer)
         self.data = self._preprocess_data(data)
@@ -687,9 +673,9 @@ def _preprocess_data(self, data):
         If self.chunksize, we prepare the data for the `__next__` method.
         Otherwise, we read it into memory for the `read` method.
         """
-        if hasattr(data, "read") and (not self.chunksize or not self.nrows):
+        if hasattr(data, "read") and not self.chunksize:
             data = data.read()
-        if not hasattr(data, "read") and (self.chunksize or self.nrows):
+        if not hasattr(data, "read") and self.chunksize:
             data = StringIO(data)
 
         return data
@@ -724,9 +710,6 @@ def _get_data_from_filepath(self, filepath_or_buffer):
             self.should_close = True
             self.open_stream = data
 
-        if isinstance(data, BytesIO):
-            data = data.getvalue().decode()
-
         return data
 
     def _combine_lines(self, lines) -> str:
@@ -740,17 +723,11 @@ def read(self):
         """
         Read the whole JSON input into a pandas object.
         """
-        if self.lines:
-            if self.chunksize:
-                obj = concat(self)
-            elif self.nrows:
-                lines = list(islice(self.data, self.nrows))
-                lines_json = self._combine_lines(lines)
-                obj = self._get_object_parser(lines_json)
-            else:
-                data = ensure_str(self.data)
-                data = data.split("\n")
-                obj = self._get_object_parser(self._combine_lines(data))
+        if self.lines and self.chunksize:
+            obj = concat(self)
+        elif self.lines:
+            data = ensure_str(self.data)
+            obj = self._get_object_parser(self._combine_lines(data.split("\n")))
         else:
             obj = self._get_object_parser(self.data)
         self.close()
@@ -797,11 +774,6 @@ def close(self):
                 pass
 
     def __next__(self):
-        if self.nrows:
-            if self.nrows_seen >= self.nrows:
-                self.close()
-                raise StopIteration
-
         lines = list(islice(self.data, self.chunksize))
         if lines:
             lines_json = self._combine_lines(lines)
@@ -896,15 +868,12 @@ def _convert_axes(self):
         """
         Try to convert axes.
         """
-        for axis_name in self.obj._AXIS_ORDERS:
+        for axis in self.obj._AXIS_NUMBERS.keys():
             new_axis, result = self._try_convert_data(
-                name=axis_name,
-                data=self.obj._get_axis(axis_name),
-                use_dtypes=False,
-                convert_dates=True,
+                axis, self.obj._get_axis(axis), use_dtypes=False, convert_dates=True
             )
             if result:
-                setattr(self.obj, axis_name, new_axis)
+                setattr(self.obj, axis, new_axis)
 
     def _try_convert_types(self):
         raise AbstractMethodError(self)
@@ -913,6 +882,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
         """
         Try to parse a ndarray like into a column by inferring dtype.
         """
+
         # don't try to coerce, unless a force conversion
         if use_dtypes:
             if not self.dtype:
@@ -967,7 +937,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
                 if (new_data == data).all():
                     data = new_data
                     result = True
-            except (TypeError, ValueError, OverflowError):
+            except (TypeError, ValueError):
                 pass
 
         # coerce ints to 64
@@ -989,6 +959,7 @@ def _try_convert_to_date(self, data):
         Try to coerce object in epoch/iso formats and integer/float in epoch
         formats. Return a boolean if parsing was successful.
         """
+
         # no conversion on empty
         if not len(data):
             return data, False
@@ -1003,9 +974,9 @@ def _try_convert_to_date(self, data):
         # ignore numbers that are out of range
         if issubclass(new_data.dtype.type, np.number):
             in_range = (
-                isna(new_data._values)
+                isna(new_data.values)
                 | (new_data > self.min_stamp)
-                | (new_data._values == iNaT)
+                | (new_data.values == iNaT)
             )
             if not in_range.all():
                 return data, False
@@ -1014,7 +985,7 @@ def _try_convert_to_date(self, data):
         for date_unit in date_units:
             try:
                 new_data = to_datetime(new_data, errors="raise", unit=date_unit)
-            except (ValueError, OverflowError, TypeError):
+            except (ValueError, OverflowError):
                 continue
             return new_data, True
         return data, False
@@ -1142,6 +1113,7 @@ def _process_converter(self, f, filt=None):
         """
         Take a conversion function and possibly recreate the frame.
         """
+
         if filt is None:
             filt = lambda col, c: True
 
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py