diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 63e6b007f77a8..cc01270181202 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2111,8 +2111,6 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series`` * ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is ``True`` * ``convert_dates`` : a list of columns to parse for dates; If ``True``, then try to parse date-like columns, default is ``True``. * ``keep_default_dates`` : boolean, default ``True``. If parsing dates, then parse the default date-like columns. -* ``numpy`` : direct decoding to NumPy arrays. default is ``False``; - Supports numeric data only, although labels may be non-numeric. Also note that the JSON ordering **MUST** be the same for each term if ``numpy=True``. * ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality. * ``date_unit`` : string, the timestamp unit to detect if converting dates. Default None. By default the timestamp precision will be detected, if this is not desired @@ -2216,74 +2214,6 @@ Dates written in nanoseconds need to be read back in nanoseconds: dfju = pd.read_json(json, date_unit="ns") dfju -The Numpy parameter -+++++++++++++++++++ - -.. note:: - This param has been deprecated as of version 1.0.0 and will raise a ``FutureWarning``. - - This supports numeric data only. Index and columns labels may be non-numeric, e.g. strings, dates etc. - -If ``numpy=True`` is passed to ``read_json`` an attempt will be made to sniff -an appropriate dtype during deserialization and to subsequently decode directly -to NumPy arrays, bypassing the need for intermediate Python objects. - -This can provide speedups if you are deserialising a large amount of numeric -data: - -.. ipython:: python - - randfloats = np.random.uniform(-100, 1000, 10000) - randfloats.shape = (1000, 10) - dffloats = pd.DataFrame(randfloats, columns=list("ABCDEFGHIJ")) - - jsonfloats = dffloats.to_json() - -.. ipython:: python - - %timeit pd.read_json(jsonfloats) - -.. ipython:: python - :okwarning: - - %timeit pd.read_json(jsonfloats, numpy=True) - -The speedup is less noticeable for smaller datasets: - -.. ipython:: python - - jsonfloats = dffloats.head(100).to_json() - -.. ipython:: python - - %timeit pd.read_json(jsonfloats) - -.. ipython:: python - :okwarning: - - %timeit pd.read_json(jsonfloats, numpy=True) - -.. warning:: - - Direct NumPy decoding makes a number of assumptions and may fail or produce - unexpected output if these assumptions are not satisfied: - - - data is numeric. - - - data is uniform. The dtype is sniffed from the first value decoded. - A ``ValueError`` may be raised, or incorrect output may be produced - if this condition is not satisfied. - - - labels are ordered. Labels are only read from the first container, it is assumed - that each subsequent row / column has been encoded in the same order. This should be satisfied if the - data was encoded using ``to_json`` but may not be the case if the JSON - is from another source. - -.. ipython:: python - :suppress: - - os.remove("test.json") - .. _io.json_normalize: Normalization diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index db7562e409fd1..b931b7503abb3 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -151,6 +151,7 @@ Removal of prior version deprecations/changes - Removed the ``numeric_only`` keyword from :meth:`Categorical.min` and :meth:`Categorical.max` in favor of ``skipna`` (:issue:`48821`) - Removed :func:`is_extension_type` in favor of :func:`is_extension_array_dtype` (:issue:`29457`) - Remove :meth:`DataFrameGroupBy.pad` and :meth:`DataFrameGroupBy.backfill` (:issue:`45076`) +- Remove ``numpy`` argument from :func:`read_json` (:issue:`30636`) - Removed the ``center`` keyword in :meth:`DataFrame.expanding` (:issue:`20647`) - Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`) diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index c58f25b8f99ea..d7086ffba623a 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -83,12 +83,6 @@ JSOBJ Object_npyNewArrayList(void *prv, void *decoder); JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj); int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value); -// labelled support, encode keys and values of JS object into separate numpy -// arrays -JSOBJ Object_npyNewObject(void *prv, void *decoder); -JSOBJ Object_npyEndObject(void *prv, JSOBJ obj); -int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value); - // free the numpy context buffer void Npy_releaseContext(NpyArrContext *npyarr) { PRINTMARK(); @@ -374,68 +368,6 @@ int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value) { return 1; } -JSOBJ Object_npyNewObject(void *prv, void *_decoder) { - PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; - PRINTMARK(); - if (decoder->curdim > 1) { - PyErr_SetString(PyExc_ValueError, - "labels only supported up to 2 dimensions"); - return NULL; - } - - return ((JSONObjectDecoder *)decoder)->newArray(prv, decoder); -} - -JSOBJ Object_npyEndObject(void *prv, JSOBJ obj) { - PyObject *list; - npy_intp labelidx; - NpyArrContext *npyarr = (NpyArrContext *)obj; - PRINTMARK(); - if (!npyarr) { - return NULL; - } - - labelidx = npyarr->dec->curdim - 1; - - list = npyarr->labels[labelidx]; - if (list) { - npyarr->labels[labelidx] = PyArray_FROM_O(list); - Py_DECREF(list); - } - - return (PyObject *)((JSONObjectDecoder *)npyarr->dec)->endArray(prv, obj); -} - -int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { - PyObject *label, *labels; - npy_intp labelidx; - // add key to label array, value to values array - NpyArrContext *npyarr = (NpyArrContext *)obj; - PRINTMARK(); - if (!npyarr) { - return 0; - } - - label = (PyObject *)name; - labelidx = npyarr->dec->curdim - 1; - - if (!npyarr->labels[labelidx]) { - npyarr->labels[labelidx] = PyList_New(0); - } - labels = npyarr->labels[labelidx]; - // only fill label array once, assumes all column labels are the same - // for 2-dimensional arrays. - if (PyList_Check(labels) && PyList_GET_SIZE(labels) <= npyarr->elcount) { - PyList_Append(labels, label); - } - - if (((JSONObjectDecoder *)npyarr->dec)->arrayAddItem(prv, obj, value)) { - Py_DECREF(label); - return 1; - } - return 0; -} - int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { int ret = PyDict_SetItem(obj, name, value); Py_DECREF((PyObject *)name); @@ -494,7 +426,7 @@ static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) { } } -static char *g_kwlist[] = {"obj", "precise_float", "numpy", +static char *g_kwlist[] = {"obj", "precise_float", "labelled", "dtype", NULL}; PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { @@ -505,7 +437,7 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { JSONObjectDecoder *decoder; PyObjectDecoder pyDecoder; PyArray_Descr *dtype = NULL; - int numpy = 0, labelled = 0; + int labelled = 0; JSONObjectDecoder dec = { Object_newString, Object_objectAddKey, Object_arrayAddItem, @@ -528,7 +460,7 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { decoder = (JSONObjectDecoder *)&pyDecoder; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiiO&", g_kwlist, &arg, - &opreciseFloat, &numpy, &labelled, + &opreciseFloat, &labelled, PyArray_DescrConverter2, &dtype)) { Npy_releaseContext(pyDecoder.npyarr); return NULL; @@ -554,19 +486,6 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { decoder->errorStr = NULL; decoder->errorOffset = NULL; - if (numpy) { - pyDecoder.dtype = dtype; - decoder->newArray = Object_npyNewArray; - decoder->endArray = Object_npyEndArray; - decoder->arrayAddItem = Object_npyArrayAddItem; - - if (labelled) { - decoder->newObject = Object_npyNewObject; - decoder->endObject = Object_npyEndObject; - decoder->objectAddKey = Object_npyObjectAddKey; - } - } - ret = JSON_DecodeObject(decoder, PyBytes_AS_STRING(sarg), PyBytes_GET_SIZE(sarg)); diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 9b8364c449e36..4a27b311982df 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -5,7 +5,6 @@ abstractmethod, ) from collections import abc -import functools from io import StringIO from itertools import islice from types import TracebackType @@ -36,7 +35,6 @@ ) from pandas.errors import AbstractMethodError from pandas.util._decorators import ( - deprecate_kwarg, deprecate_nonkeyword_arguments, doc, ) @@ -375,7 +373,6 @@ def read_json( convert_axes=..., convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., - numpy: bool = ..., precise_float: bool = ..., date_unit: str | None = ..., encoding: str | None = ..., @@ -399,7 +396,6 @@ def read_json( convert_axes=..., convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., - numpy: bool = ..., precise_float: bool = ..., date_unit: str | None = ..., encoding: str | None = ..., @@ -423,7 +419,6 @@ def read_json( convert_axes=..., convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., - numpy: bool = ..., precise_float: bool = ..., date_unit: str | None = ..., encoding: str | None = ..., @@ -446,7 +441,6 @@ def read_json( convert_axes=..., convert_dates: bool | list[str] = ..., keep_default_dates: bool = ..., - numpy: bool = ..., precise_float: bool = ..., date_unit: str | None = ..., encoding: str | None = ..., @@ -464,7 +458,6 @@ def read_json( storage_options=_shared_docs["storage_options"], decompression_options=_shared_docs["decompression_options"] % "path_or_buf", ) -@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None) @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["path_or_buf"]) def read_json( path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], @@ -474,7 +467,6 @@ def read_json( convert_axes=None, convert_dates: bool | list[str] = True, keep_default_dates: bool = True, - numpy: bool = False, precise_float: bool = False, date_unit: str | None = None, encoding: str | None = None, @@ -580,13 +572,6 @@ def read_json( * it is ``'date'``. - numpy : bool, default False - Direct decoding to numpy arrays. Supports numeric data only, but - non-numeric column and index labels are supported. Note also that the - JSON ordering MUST be the same for each term if numpy=True. - - .. deprecated:: 1.0.0 - precise_float : bool, default False Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (False) is to use fast but @@ -739,7 +724,6 @@ def read_json( convert_axes=convert_axes, convert_dates=convert_dates, keep_default_dates=keep_default_dates, - numpy=numpy, precise_float=precise_float, date_unit=date_unit, encoding=encoding, @@ -776,7 +760,6 @@ def __init__( convert_axes, convert_dates, keep_default_dates: bool, - numpy: bool, precise_float: bool, date_unit, encoding, @@ -794,7 +777,6 @@ def __init__( self.convert_axes = convert_axes self.convert_dates = convert_dates self.keep_default_dates = keep_default_dates - self.numpy = numpy self.precise_float = precise_float self.date_unit = date_unit self.encoding = encoding @@ -929,7 +911,6 @@ def _get_object_parser(self, json) -> DataFrame | Series: "convert_axes": self.convert_axes, "convert_dates": self.convert_dates, "keep_default_dates": self.keep_default_dates, - "numpy": self.numpy, "precise_float": self.precise_float, "date_unit": self.date_unit, } @@ -1021,7 +1002,6 @@ def __init__( convert_axes: bool = True, convert_dates: bool | list[str] = True, keep_default_dates: bool = False, - numpy: bool = False, precise_float: bool = False, date_unit=None, ) -> None: @@ -1034,9 +1014,6 @@ def __init__( self.dtype = dtype - if orient == "split": - numpy = False - if date_unit is not None: date_unit = date_unit.lower() if date_unit not in self._STAMP_UNITS: @@ -1045,7 +1022,6 @@ def __init__( else: self.min_stamp = self._MIN_STAMPS["s"] - self.numpy = numpy self.precise_float = precise_float self.convert_axes = convert_axes self.convert_dates = convert_dates @@ -1063,11 +1039,7 @@ def check_keys_split(self, decoded) -> None: raise ValueError(f"JSON data had unexpected key(s): {bad_keys_joined}") def parse(self): - - if self.numpy: - self._parse_numpy() - else: - self._parse_no_numpy() + self._parse() if self.obj is None: return None @@ -1076,10 +1048,7 @@ def parse(self): self._try_convert_types() return self.obj - def _parse_numpy(self): - raise AbstractMethodError(self) - - def _parse_no_numpy(self): + def _parse(self): raise AbstractMethodError(self) def _convert_axes(self) -> None: @@ -1231,7 +1200,7 @@ class SeriesParser(Parser): _default_orient = "index" _split_keys = ("name", "index", "data") - def _parse_no_numpy(self) -> None: + def _parse(self) -> None: data = loads(self.json, precise_float=self.precise_float) if self.orient == "split": @@ -1241,30 +1210,6 @@ def _parse_no_numpy(self) -> None: else: self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) - def _parse_numpy(self) -> None: - load_kwargs = { - "dtype": None, - "numpy": True, - "precise_float": self.precise_float, - } - if self.orient in ["columns", "index"]: - load_kwargs["labelled"] = True - loads_ = functools.partial(loads, **load_kwargs) - data = loads_(self.json) - - if self.orient == "split": - decoded = {str(k): v for k, v in data.items()} - self.check_keys_split(decoded) - self.obj = create_series_with_explicit_dtype(**decoded) - elif self.orient in ["columns", "index"]: - # error: "create_series_with_explicit_dtype" - # gets multiple values for keyword argument "dtype_if_empty - self.obj = create_series_with_explicit_dtype( - *data, dtype_if_empty=object - ) # type: ignore[misc] - else: - self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) - def _try_convert_types(self) -> None: if self.obj is None: return @@ -1279,45 +1224,7 @@ class FrameParser(Parser): _default_orient = "columns" _split_keys = ("columns", "index", "data") - def _parse_numpy(self) -> None: - - json = self.json - orient = self.orient - - if orient == "columns": - args = loads( - json, - dtype=None, - numpy=True, - labelled=True, - precise_float=self.precise_float, - ) - if len(args): - args = (args[0].T, args[2], args[1]) - self.obj = DataFrame(*args) - elif orient == "split": - decoded = loads( - json, dtype=None, numpy=True, precise_float=self.precise_float - ) - decoded = {str(k): v for k, v in decoded.items()} - self.check_keys_split(decoded) - self.obj = DataFrame(**decoded) - elif orient == "values": - self.obj = DataFrame( - loads(json, dtype=None, numpy=True, precise_float=self.precise_float) - ) - else: - self.obj = DataFrame( - *loads( - json, - dtype=None, - numpy=True, - labelled=True, - precise_float=self.precise_float, - ) - ) - - def _parse_no_numpy(self) -> None: + def _parse(self) -> None: json = self.json orient = self.orient diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index daa8550965db4..ef4a172f07b0a 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -9,10 +9,7 @@ import numpy as np import pytest -from pandas.compat import ( - IS64, - is_platform_windows, -) +from pandas.compat import IS64 import pandas.util._test_decorators as td import pandas as pd @@ -37,7 +34,6 @@ def assert_json_roundtrip_equal(result, expected, orient): @pytest.mark.filterwarnings( "ignore:an integer is required (got type float)*:DeprecationWarning" ) -@pytest.mark.filterwarnings("ignore:the 'numpy' keyword is deprecated:FutureWarning") class TestPandasContainer: @pytest.fixture def categorical_frame(self): @@ -137,12 +133,9 @@ def test_frame_default_orient(self, float_frame): @pytest.mark.parametrize("dtype", [False, float]) @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype, float_frame): + def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame): data = float_frame.to_json(orient=orient) - result = read_json( - data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype - ) + result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype) expected = float_frame @@ -150,28 +143,15 @@ def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype, float_frame) @pytest.mark.parametrize("dtype", [False, np.int64]) @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype, int_frame): + def test_roundtrip_intframe(self, orient, convert_axes, dtype, int_frame): data = int_frame.to_json(orient=orient) - result = read_json( - data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype - ) + result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype) expected = int_frame - if ( - numpy - and (not IS64 or is_platform_windows()) - and not dtype - and orient != "split" - ): - # TODO: see what is causing roundtrip dtype loss - expected = expected.astype(np.int32) - assert_json_roundtrip_equal(result, expected, orient) @pytest.mark.parametrize("dtype", [None, np.float64, int, "U3"]) @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_str_axes(self, request, orient, convert_axes, numpy, dtype): + def test_roundtrip_str_axes(self, orient, convert_axes, dtype): df = DataFrame( np.zeros((200, 4)), columns=[str(i) for i in range(4)], @@ -179,16 +159,8 @@ def test_roundtrip_str_axes(self, request, orient, convert_axes, numpy, dtype): dtype=dtype, ) - # TODO: do we even need to support U3 dtypes? - if numpy and dtype == "U3" and orient != "split": - request.node.add_marker( - pytest.mark.xfail(reason="Can't decode directly to array") - ) - data = df.to_json(orient=orient) - result = read_json( - data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype - ) + result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype) expected = df.copy() if not dtype: @@ -210,9 +182,8 @@ def test_roundtrip_str_axes(self, request, orient, convert_axes, numpy, dtype): assert_json_roundtrip_equal(result, expected, orient) @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) def test_roundtrip_categorical( - self, request, orient, categorical_frame, convert_axes, numpy + self, request, orient, categorical_frame, convert_axes ): # TODO: create a better frame to test with and improve coverage if orient in ("index", "columns"): @@ -223,45 +194,33 @@ def test_roundtrip_categorical( ) data = categorical_frame.to_json(orient=orient) - if numpy and orient in ("records", "values"): - request.node.add_marker( - pytest.mark.xfail(reason=f"Orient {orient} is broken with numpy=True") - ) - result = read_json(data, orient=orient, convert_axes=convert_axes, numpy=numpy) + result = read_json(data, orient=orient, convert_axes=convert_axes) expected = categorical_frame.copy() expected.index = expected.index.astype(str) # Categorical not preserved expected.index.name = None # index names aren't preserved in JSON - - if not numpy and orient == "index": - expected = expected.sort_index() - assert_json_roundtrip_equal(result, expected, orient) @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_empty(self, orient, convert_axes, numpy): + def test_roundtrip_empty(self, orient, convert_axes): empty_frame = DataFrame() data = empty_frame.to_json(orient=orient) - result = read_json(data, orient=orient, convert_axes=convert_axes, numpy=numpy) + result = read_json(data, orient=orient, convert_axes=convert_axes) expected = empty_frame.copy() # TODO: both conditions below are probably bugs if convert_axes: expected.index = expected.index.astype(float) expected.columns = expected.columns.astype(float) - if numpy and orient == "values": - expected = expected.reindex([0], axis=1).reset_index(drop=True) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_timestamp(self, orient, convert_axes, numpy, datetime_frame): + def test_roundtrip_timestamp(self, orient, convert_axes, datetime_frame): # TODO: improve coverage with date_format parameter data = datetime_frame.to_json(orient=orient) - result = read_json(data, orient=orient, convert_axes=convert_axes, numpy=numpy) + result = read_json(data, orient=orient, convert_axes=convert_axes) expected = datetime_frame.copy() if not convert_axes: # one off for ts handling @@ -275,12 +234,7 @@ def test_roundtrip_timestamp(self, orient, convert_axes, numpy, datetime_frame): assert_json_roundtrip_equal(result, expected, orient) @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_roundtrip_mixed(self, request, orient, convert_axes, numpy): - if numpy and orient != "split": - request.node.add_marker( - pytest.mark.xfail(reason="Can't decode directly to array") - ) + def test_roundtrip_mixed(self, orient, convert_axes): index = pd.Index(["a", "b", "c", "d", "e"]) values = { @@ -293,14 +247,11 @@ def test_roundtrip_mixed(self, request, orient, convert_axes, numpy): df = DataFrame(data=values, index=index) data = df.to_json(orient=orient) - result = read_json(data, orient=orient, convert_axes=convert_axes, numpy=numpy) + result = read_json(data, orient=orient, convert_axes=convert_axes) expected = df.copy() expected = expected.assign(**expected.select_dtypes("number").astype(np.int64)) - if not numpy and orient == "index": - expected = expected.sort_index() - assert_json_roundtrip_equal(result, expected, orient) @pytest.mark.parametrize( @@ -343,8 +294,7 @@ def test_frame_from_json_bad_data_raises(self, data, msg, orient): @pytest.mark.parametrize("dtype", [True, False]) @pytest.mark.parametrize("convert_axes", [True, False]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_frame_from_json_missing_data(self, orient, convert_axes, numpy, dtype): + def test_frame_from_json_missing_data(self, orient, convert_axes, dtype): num_df = DataFrame([[1, 2], [4, 5, 6]]) result = read_json( num_df.to_json(orient=orient), @@ -643,10 +593,9 @@ def test_series_non_unique_index(self): def test_series_default_orient(self, string_series): assert string_series.to_json() == string_series.to_json(orient="index") - @pytest.mark.parametrize("numpy", [True, False]) - def test_series_roundtrip_simple(self, orient, numpy, string_series): + def test_series_roundtrip_simple(self, orient, string_series): data = string_series.to_json(orient=orient) - result = read_json(data, typ="series", orient=orient, numpy=numpy) + result = read_json(data, typ="series", orient=orient) expected = string_series if orient in ("values", "records"): @@ -657,10 +606,9 @@ def test_series_roundtrip_simple(self, orient, numpy, string_series): tm.assert_series_equal(result, expected) @pytest.mark.parametrize("dtype", [False, None]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_series_roundtrip_object(self, orient, numpy, dtype, object_series): + def test_series_roundtrip_object(self, orient, dtype, object_series): data = object_series.to_json(orient=orient) - result = read_json(data, typ="series", orient=orient, numpy=numpy, dtype=dtype) + result = read_json(data, typ="series", orient=orient, dtype=dtype) expected = object_series if orient in ("values", "records"): @@ -670,11 +618,10 @@ def test_series_roundtrip_object(self, orient, numpy, dtype, object_series): tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("numpy", [True, False]) - def test_series_roundtrip_empty(self, orient, numpy): + def test_series_roundtrip_empty(self, orient): empty_series = Series([], index=[], dtype=np.float64) data = empty_series.to_json(orient=orient) - result = read_json(data, typ="series", orient=orient, numpy=numpy) + result = read_json(data, typ="series", orient=orient) expected = empty_series if orient in ("values", "records"): @@ -684,10 +631,9 @@ def test_series_roundtrip_empty(self, orient, numpy): tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("numpy", [True, False]) - def test_series_roundtrip_timeseries(self, orient, numpy, datetime_series): + def test_series_roundtrip_timeseries(self, orient, datetime_series): data = datetime_series.to_json(orient=orient) - result = read_json(data, typ="series", orient=orient, numpy=numpy) + result = read_json(data, typ="series", orient=orient) expected = datetime_series if orient in ("values", "records"): @@ -698,11 +644,10 @@ def test_series_roundtrip_timeseries(self, orient, numpy, datetime_series): tm.assert_series_equal(result, expected) @pytest.mark.parametrize("dtype", [np.float64, int]) - @pytest.mark.parametrize("numpy", [True, False]) - def test_series_roundtrip_numeric(self, orient, numpy, dtype): + def test_series_roundtrip_numeric(self, orient, dtype): s = Series(range(6), index=["a", "b", "c", "d", "e", "f"]) data = s.to_json(orient=orient) - result = read_json(data, typ="series", orient=orient, numpy=numpy) + result = read_json(data, typ="series", orient=orient) expected = s.copy() if orient in ("values", "records"): @@ -963,24 +908,6 @@ def test_doc_example(self): result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_}) tm.assert_frame_equal(result, result) - def test_misc_example(self): - - # parsing unordered input fails - result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True) - expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) - - error_msg = """DataFrame\\.index are different - -DataFrame\\.index values are different \\(100\\.0 %\\) -\\[left\\]: Index\\(\\['a', 'b'\\], dtype='object'\\) -\\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)""" - with pytest.raises(AssertionError, match=error_msg): - tm.assert_frame_equal(result, expected, check_index_type=False) - - result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]') - expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) - tm.assert_frame_equal(result, expected) - def test_round_trip_exception_(self, datapath): # GH 3867 path = datapath("io", "json", "data", "teams.csv") @@ -1774,13 +1701,6 @@ def test_emca_262_nan_inf_support(self): ) tm.assert_frame_equal(result, expected) - def test_deprecate_numpy_argument_read_json(self): - # GH 28512 - expected = DataFrame([1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - result = read_json(expected.to_json(), numpy=True) - tm.assert_frame_equal(result, expected) - def test_frame_int_overflow(self): # GH 30320 encoded_json = json.dumps([{"col": "31900441201190696999"}, {"col": "Text"}]) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index b371990178d28..fe69059e94729 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -155,7 +155,6 @@ def test_readjson_chunks_closes(chunksize): convert_axes=True, convert_dates=True, keep_default_dates=True, - numpy=False, precise_float=False, date_unit=None, encoding=None, diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index ae13d8d5fb180..28545b7ab2cc6 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -13,10 +13,7 @@ import pytz import pandas._libs.json as ujson -from pandas.compat import ( - IS64, - is_platform_windows, -) +from pandas.compat import IS64 from pandas import ( DataFrame, @@ -55,23 +52,6 @@ def orient(request): return request.param -@pytest.fixture(params=[None, True]) -def numpy(request): - return request.param - - -def get_int32_compat_dtype(numpy, orient): - # See GH#32527 - dtype = np.int64 - if not ((numpy is None or orient == "index") or (numpy is True and orient is None)): - if is_platform_windows(): - dtype = np.int32 - else: - dtype = np.intp - - return dtype - - class TestUltraJSONTests: @pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865") def test_encode_decimal(self): @@ -218,11 +198,6 @@ def test_encode_array_of_nested_arrays(self): assert nested_input == json.loads(output) assert nested_input == ujson.decode(output) - nested_input = np.array(nested_input) - tm.assert_numpy_array_equal( - nested_input, ujson.decode(output, numpy=True, dtype=nested_input.dtype) - ) - def test_encode_array_of_doubles(self): doubles_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337] * 10 output = ujson.encode(doubles_input) @@ -230,10 +205,6 @@ def test_encode_array_of_doubles(self): assert doubles_input == json.loads(output) assert doubles_input == ujson.decode(output) - tm.assert_numpy_array_equal( - np.array(doubles_input), ujson.decode(output, numpy=True) - ) - def test_double_precision(self): double_input = 30.012345678901234 output = ujson.encode(double_input, double_precision=15) @@ -328,10 +299,6 @@ def test_encode_array_in_array(self): assert output == json.dumps(arr_in_arr_input) assert arr_in_arr_input == ujson.decode(output) - tm.assert_numpy_array_equal( - np.array(arr_in_arr_input), ujson.decode(output, numpy=True) - ) - @pytest.mark.parametrize( "num_input", [ @@ -353,10 +320,6 @@ def test_encode_list_conversion(self): assert list_input == json.loads(output) assert list_input == ujson.decode(output) - tm.assert_numpy_array_equal( - np.array(list_input), ujson.decode(output, numpy=True) - ) - def test_encode_dict_conversion(self): dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4} output = ujson.encode(dict_input) @@ -589,10 +552,6 @@ def test_encode_list_long_conversion(self): assert long_input == json.loads(output) assert long_input == ujson.decode(output) - tm.assert_numpy_array_equal( - np.array(long_input), ujson.decode(output, numpy=True, dtype=np.int64) - ) - @pytest.mark.parametrize("long_input", [9223372036854775807, 18446744073709551615]) def test_encode_long_conversion(self, long_input): output = ujson.encode(long_input) @@ -801,7 +760,6 @@ def test_array_basic(self): arr = arr.reshape((2, 2, 2, 2, 3, 2)) tm.assert_numpy_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr) - tm.assert_numpy_array_equal(ujson.decode(ujson.encode(arr), numpy=True), arr) @pytest.mark.parametrize("shape", [(10, 10), (5, 5, 4), (100, 1)]) def test_array_reshaped(self, shape): @@ -809,7 +767,6 @@ def test_array_reshaped(self, shape): arr = arr.reshape(shape) tm.assert_numpy_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr) - tm.assert_numpy_array_equal(ujson.decode(ujson.encode(arr), numpy=True), arr) def test_array_list(self): arr_list = [ @@ -836,123 +793,17 @@ def test_array_float(self): arr_out = np.array(ujson.decode(ujson.encode(arr)), dtype=dtype) tm.assert_almost_equal(arr, arr_out) - arr_out = ujson.decode(ujson.encode(arr), numpy=True, dtype=dtype) - tm.assert_almost_equal(arr, arr_out) - def test_0d_array(self): # gh-18878 msg = re.escape("array(1) (0d array) is not JSON serializable at the moment") with pytest.raises(TypeError, match=msg): ujson.encode(np.array(1)) - @pytest.mark.parametrize( - "bad_input,exc_type,err_msg,kwargs", - [ - ( - [{}, []], - ValueError, - r"nesting not supported for object or variable length dtypes", - {}, - ), - ( - [42, None], - TypeError, - r"int\(\) argument must be a string, a bytes-like object or a( real)? " - r"number, not 'NoneType'", - {}, - ), - ( - [["a"], 42], - ValueError, - r"Cannot decode multidimensional arrays with variable length elements " - r"to numpy", - {}, - ), - ( - [42, {}, "a"], - TypeError, - r"int\(\) argument must be a string, a bytes-like object or a( real)? " - r"number, not 'dict'", - {}, - ), - ( - [42, ["a"], 42], - ValueError, - r"invalid literal for int\(\) with base 10: 'a'", - {}, - ), - ( - ["a", "b", [], "c"], - ValueError, - r"nesting not supported for object or variable length dtypes", - {}, - ), - ( - [{"a": "b"}], - ValueError, - r"Cannot decode multidimensional arrays with variable length elements " - r"to numpy", - {"labelled": True}, - ), - ( - {"a": {"b": {"c": 42}}}, - ValueError, - r"labels only supported up to 2 dimensions", - {"labelled": True}, - ), - ( - [{"a": 42, "b": 23}, {"c": 17}], - ValueError, - r"cannot reshape array of size 3 into shape \(2,1\)", - {"labelled": True}, - ), - ], - ) - def test_array_numpy_except(self, bad_input, exc_type, err_msg, kwargs): - with pytest.raises(exc_type, match=err_msg): - ujson.decode(ujson.dumps(bad_input), numpy=True, **kwargs) - - def test_array_numpy_labelled(self): - labelled_input = {"a": []} - output = ujson.loads(ujson.dumps(labelled_input), numpy=True, labelled=True) - assert (np.empty((1, 0)) == output[0]).all() - assert (np.array(["a"]) == output[1]).all() - assert output[2] is None - - labelled_input = [{"a": 42}] - output = ujson.loads(ujson.dumps(labelled_input), numpy=True, labelled=True) - assert (np.array(["a"]) == output[2]).all() - assert (np.array([42]) == output[0]).all() - assert output[1] is None - - # see gh-10837: write out the dump explicitly - # so there is no dependency on iteration order - input_dumps = '[{"a": 42, "b":31}, {"a": 24, "c": 99}, {"a": 2.4, "b": 78}]' - output = ujson.loads(input_dumps, numpy=True, labelled=True) - expected_vals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2)) - assert (expected_vals == output[0]).all() - assert output[1] is None - assert (np.array(["a", "b"]) == output[2]).all() - - input_dumps = ( - '{"1": {"a": 42, "b":31}, "2": {"a": 24, "c": 99}, ' - '"3": {"a": 2.4, "b": 78}}' - ) - output = ujson.loads(input_dumps, numpy=True, labelled=True) - expected_vals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2)) - assert (expected_vals == output[0]).all() - assert (np.array(["1", "2", "3"]) == output[1]).all() - assert (np.array(["a", "b"]) == output[2]).all() - class TestPandasJSONTests: - def test_dataframe(self, request, orient, numpy): - if orient == "records" and numpy: - request.node.add_marker( - pytest.mark.xfail(reason=f"Not idiomatic pandas if orient={orient}") - ) + def test_dataframe(self, orient): - dtype = get_int32_compat_dtype(numpy, orient) + dtype = np.int64 df = DataFrame( [[1, 2, 3], [4, 5, 6]], @@ -961,10 +812,9 @@ def test_dataframe(self, request, orient, numpy): dtype=dtype, ) encode_kwargs = {} if orient is None else {"orient": orient} - decode_kwargs = {} if numpy is None else {"numpy": numpy} assert (df.dtypes == dtype).all() - output = ujson.decode(ujson.encode(df, **encode_kwargs), **decode_kwargs) + output = ujson.decode(ujson.encode(df, **encode_kwargs)) assert (df.dtypes == dtype).all() # Ensure proper DataFrame initialization. @@ -1000,33 +850,8 @@ def test_dataframe_nested(self, orient): } assert ujson.decode(ujson.encode(nested, **kwargs)) == exp - def test_dataframe_numpy_labelled(self, orient, request): - if orient in ("split", "values"): - request.node.add_marker( - pytest.mark.xfail(reason=f"{orient} incompatible for labelled=True") - ) - - df = DataFrame( - [[1, 2, 3], [4, 5, 6]], - index=["a", "b"], - columns=["x", "y", "z"], - dtype=int, - ) - kwargs = {} if orient is None else {"orient": orient} - - output = DataFrame( - *ujson.decode(ujson.encode(df, **kwargs), numpy=True, labelled=True) - ) - - if orient is None: - df = df.T - elif orient == "records": - df.index = [0, 1] - - tm.assert_frame_equal(output, df) - - def test_series(self, orient, numpy): - dtype = get_int32_compat_dtype(numpy, orient) + def test_series(self, orient): + dtype = np.int64 s = Series( [10, 20, 30, 40, 50, 60], name="series", @@ -1036,9 +861,8 @@ def test_series(self, orient, numpy): assert s.dtype == dtype encode_kwargs = {} if orient is None else {"orient": orient} - decode_kwargs = {} if numpy is None else {"numpy": numpy} - output = ujson.decode(ujson.encode(s, **encode_kwargs), **decode_kwargs) + output = ujson.decode(ujson.encode(s, **encode_kwargs)) assert s.dtype == dtype if orient == "split": @@ -1078,45 +902,24 @@ def test_index(self): output = Index(ujson.decode(ujson.encode(i)), name="index") tm.assert_index_equal(i, output) - output = Index(ujson.decode(ujson.encode(i), numpy=True), name="index") - tm.assert_index_equal(i, output) - dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"))) output = Index(**dec) tm.assert_index_equal(i, output) assert i.name == output.name - dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"), numpy=True)) - output = Index(**dec) - tm.assert_index_equal(i, output) assert i.name == output.name output = Index(ujson.decode(ujson.encode(i, orient="values")), name="index") tm.assert_index_equal(i, output) - output = Index( - ujson.decode(ujson.encode(i, orient="values"), numpy=True), name="index" - ) - tm.assert_index_equal(i, output) - output = Index(ujson.decode(ujson.encode(i, orient="records")), name="index") tm.assert_index_equal(i, output) - output = Index( - ujson.decode(ujson.encode(i, orient="records"), numpy=True), name="index" - ) - tm.assert_index_equal(i, output) - output = Index(ujson.decode(ujson.encode(i, orient="index")), name="index") tm.assert_index_equal(i, output) - output = Index( - ujson.decode(ujson.encode(i, orient="index"), numpy=True), name="index" - ) - tm.assert_index_equal(i, output) - def test_datetime_index(self): date_unit = "ns"