Skip to content

Commit 2410fca

Browse files
authored
DEP: Enforce numpy keyword deprecation in read_json (#49083)
1 parent 753ea2e commit 2410fca

File tree

7 files changed

+41
-562
lines changed

7 files changed

+41
-562
lines changed

doc/source/user_guide/io.rst

-70
Original file line numberDiff line numberDiff line change
@@ -2111,8 +2111,6 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series``
21112111
* ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is ``True``
21122112
* ``convert_dates`` : a list of columns to parse for dates; If ``True``, then try to parse date-like columns, default is ``True``.
21132113
* ``keep_default_dates`` : boolean, default ``True``. If parsing dates, then parse the default date-like columns.
2114-
* ``numpy`` : direct decoding to NumPy arrays. default is ``False``;
2115-
Supports numeric data only, although labels may be non-numeric. Also note that the JSON ordering **MUST** be the same for each term if ``numpy=True``.
21162114
* ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality.
21172115
* ``date_unit`` : string, the timestamp unit to detect if converting dates. Default
21182116
None. By default the timestamp precision will be detected, if this is not desired
@@ -2216,74 +2214,6 @@ Dates written in nanoseconds need to be read back in nanoseconds:
22162214
dfju = pd.read_json(json, date_unit="ns")
22172215
dfju
22182216
2219-
The Numpy parameter
2220-
+++++++++++++++++++
2221-
2222-
.. note::
2223-
This param has been deprecated as of version 1.0.0 and will raise a ``FutureWarning``.
2224-
2225-
This supports numeric data only. Index and columns labels may be non-numeric, e.g. strings, dates etc.
2226-
2227-
If ``numpy=True`` is passed to ``read_json`` an attempt will be made to sniff
2228-
an appropriate dtype during deserialization and to subsequently decode directly
2229-
to NumPy arrays, bypassing the need for intermediate Python objects.
2230-
2231-
This can provide speedups if you are deserialising a large amount of numeric
2232-
data:
2233-
2234-
.. ipython:: python
2235-
2236-
randfloats = np.random.uniform(-100, 1000, 10000)
2237-
randfloats.shape = (1000, 10)
2238-
dffloats = pd.DataFrame(randfloats, columns=list("ABCDEFGHIJ"))
2239-
2240-
jsonfloats = dffloats.to_json()
2241-
2242-
.. ipython:: python
2243-
2244-
%timeit pd.read_json(jsonfloats)
2245-
2246-
.. ipython:: python
2247-
:okwarning:
2248-
2249-
%timeit pd.read_json(jsonfloats, numpy=True)
2250-
2251-
The speedup is less noticeable for smaller datasets:
2252-
2253-
.. ipython:: python
2254-
2255-
jsonfloats = dffloats.head(100).to_json()
2256-
2257-
.. ipython:: python
2258-
2259-
%timeit pd.read_json(jsonfloats)
2260-
2261-
.. ipython:: python
2262-
:okwarning:
2263-
2264-
%timeit pd.read_json(jsonfloats, numpy=True)
2265-
2266-
.. warning::
2267-
2268-
Direct NumPy decoding makes a number of assumptions and may fail or produce
2269-
unexpected output if these assumptions are not satisfied:
2270-
2271-
- data is numeric.
2272-
2273-
- data is uniform. The dtype is sniffed from the first value decoded.
2274-
A ``ValueError`` may be raised, or incorrect output may be produced
2275-
if this condition is not satisfied.
2276-
2277-
- labels are ordered. Labels are only read from the first container, it is assumed
2278-
that each subsequent row / column has been encoded in the same order. This should be satisfied if the
2279-
data was encoded using ``to_json`` but may not be the case if the JSON
2280-
is from another source.
2281-
2282-
.. ipython:: python
2283-
:suppress:
2284-
2285-
os.remove("test.json")
2286-
22872217
.. _io.json_normalize:
22882218

22892219
Normalization

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ Removal of prior version deprecations/changes
151151
- Removed the ``numeric_only`` keyword from :meth:`Categorical.min` and :meth:`Categorical.max` in favor of ``skipna`` (:issue:`48821`)
152152
- Removed :func:`is_extension_type` in favor of :func:`is_extension_array_dtype` (:issue:`29457`)
153153
- Remove :meth:`DataFrameGroupBy.pad` and :meth:`DataFrameGroupBy.backfill` (:issue:`45076`)
154+
- Remove ``numpy`` argument from :func:`read_json` (:issue:`30636`)
154155
- Removed the ``center`` keyword in :meth:`DataFrame.expanding` (:issue:`20647`)
155156
- Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`)
156157

pandas/_libs/src/ujson/python/JSONtoObj.c

+3-84
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,6 @@ JSOBJ Object_npyNewArrayList(void *prv, void *decoder);
8383
JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj);
8484
int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value);
8585

86-
// labelled support, encode keys and values of JS object into separate numpy
87-
// arrays
88-
JSOBJ Object_npyNewObject(void *prv, void *decoder);
89-
JSOBJ Object_npyEndObject(void *prv, JSOBJ obj);
90-
int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value);
91-
9286
// free the numpy context buffer
9387
void Npy_releaseContext(NpyArrContext *npyarr) {
9488
PRINTMARK();
@@ -374,68 +368,6 @@ int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value) {
374368
return 1;
375369
}
376370

377-
JSOBJ Object_npyNewObject(void *prv, void *_decoder) {
378-
PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder;
379-
PRINTMARK();
380-
if (decoder->curdim > 1) {
381-
PyErr_SetString(PyExc_ValueError,
382-
"labels only supported up to 2 dimensions");
383-
return NULL;
384-
}
385-
386-
return ((JSONObjectDecoder *)decoder)->newArray(prv, decoder);
387-
}
388-
389-
JSOBJ Object_npyEndObject(void *prv, JSOBJ obj) {
390-
PyObject *list;
391-
npy_intp labelidx;
392-
NpyArrContext *npyarr = (NpyArrContext *)obj;
393-
PRINTMARK();
394-
if (!npyarr) {
395-
return NULL;
396-
}
397-
398-
labelidx = npyarr->dec->curdim - 1;
399-
400-
list = npyarr->labels[labelidx];
401-
if (list) {
402-
npyarr->labels[labelidx] = PyArray_FROM_O(list);
403-
Py_DECREF(list);
404-
}
405-
406-
return (PyObject *)((JSONObjectDecoder *)npyarr->dec)->endArray(prv, obj);
407-
}
408-
409-
int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) {
410-
PyObject *label, *labels;
411-
npy_intp labelidx;
412-
// add key to label array, value to values array
413-
NpyArrContext *npyarr = (NpyArrContext *)obj;
414-
PRINTMARK();
415-
if (!npyarr) {
416-
return 0;
417-
}
418-
419-
label = (PyObject *)name;
420-
labelidx = npyarr->dec->curdim - 1;
421-
422-
if (!npyarr->labels[labelidx]) {
423-
npyarr->labels[labelidx] = PyList_New(0);
424-
}
425-
labels = npyarr->labels[labelidx];
426-
// only fill label array once, assumes all column labels are the same
427-
// for 2-dimensional arrays.
428-
if (PyList_Check(labels) && PyList_GET_SIZE(labels) <= npyarr->elcount) {
429-
PyList_Append(labels, label);
430-
}
431-
432-
if (((JSONObjectDecoder *)npyarr->dec)->arrayAddItem(prv, obj, value)) {
433-
Py_DECREF(label);
434-
return 1;
435-
}
436-
return 0;
437-
}
438-
439371
int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) {
440372
int ret = PyDict_SetItem(obj, name, value);
441373
Py_DECREF((PyObject *)name);
@@ -494,7 +426,7 @@ static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) {
494426
}
495427
}
496428

497-
static char *g_kwlist[] = {"obj", "precise_float", "numpy",
429+
static char *g_kwlist[] = {"obj", "precise_float",
498430
"labelled", "dtype", NULL};
499431

500432
PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
@@ -505,7 +437,7 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
505437
JSONObjectDecoder *decoder;
506438
PyObjectDecoder pyDecoder;
507439
PyArray_Descr *dtype = NULL;
508-
int numpy = 0, labelled = 0;
440+
int labelled = 0;
509441

510442
JSONObjectDecoder dec = {
511443
Object_newString, Object_objectAddKey, Object_arrayAddItem,
@@ -528,7 +460,7 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
528460
decoder = (JSONObjectDecoder *)&pyDecoder;
529461

530462
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiiO&", g_kwlist, &arg,
531-
&opreciseFloat, &numpy, &labelled,
463+
&opreciseFloat, &labelled,
532464
PyArray_DescrConverter2, &dtype)) {
533465
Npy_releaseContext(pyDecoder.npyarr);
534466
return NULL;
@@ -554,19 +486,6 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
554486
decoder->errorStr = NULL;
555487
decoder->errorOffset = NULL;
556488

557-
if (numpy) {
558-
pyDecoder.dtype = dtype;
559-
decoder->newArray = Object_npyNewArray;
560-
decoder->endArray = Object_npyEndArray;
561-
decoder->arrayAddItem = Object_npyArrayAddItem;
562-
563-
if (labelled) {
564-
decoder->newObject = Object_npyNewObject;
565-
decoder->endObject = Object_npyEndObject;
566-
decoder->objectAddKey = Object_npyObjectAddKey;
567-
}
568-
}
569-
570489
ret = JSON_DecodeObject(decoder, PyBytes_AS_STRING(sarg),
571490
PyBytes_GET_SIZE(sarg));
572491

0 commit comments

Comments
 (0)