From fa24319081cfdec782d299cef5596a5a4143b741 Mon Sep 17 00:00:00 2001 From: Kieran O'Mahony Date: Mon, 11 Jun 2012 16:15:50 +0100 Subject: [PATCH] BUG: ujson fix sniffing of dtype when decoding --- pandas/core/frame.py | 3 ++ pandas/core/series.py | 3 ++ pandas/src/ujson/python/JSONtoObj.c | 59 ++++++++++++++++++----------- pandas/tests/test_frame.py | 2 +- 4 files changed, 44 insertions(+), 23 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d70fba0e57ccc..c4d146ece07f0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -786,6 +786,9 @@ def from_json(cls, json, orient="columns", dtype=None, numpy=True): from pandas._ujson import loads df = None + if dtype is not None and orient == "split": + numpy = False + if numpy: try: if orient == "columns": diff --git a/pandas/core/series.py b/pandas/core/series.py index 9be6ae9dff9ad..bc93b4af16007 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -957,6 +957,9 @@ def from_json(cls, json, orient="index", dtype=None, numpy=True): from pandas._ujson import loads s = None + if dtype is not None and orient == "split": + numpy = False + if numpy: try: if orient == "split": diff --git a/pandas/src/ujson/python/JSONtoObj.c b/pandas/src/ujson/python/JSONtoObj.c index 79c1719b74583..1db7586ad17f7 100644 --- a/pandas/src/ujson/python/JSONtoObj.c +++ b/pandas/src/ujson/python/JSONtoObj.c @@ -121,6 +121,34 @@ JSOBJ Object_npyNewArray(void* _decoder) return npyarr; } +PyObject* Npy_returnLabelled(NpyArrContext* npyarr) +{ + PyObject* ret = npyarr->ret; + npy_intp i; + + if (npyarr->labels[0] || npyarr->labels[1]) + { + // finished decoding, build tuple with values and labels + ret = PyTuple_New(npyarr->shape.len+1); + for (i = 0; i < npyarr->shape.len; i++) + { + if (npyarr->labels[i]) + { + PyTuple_SET_ITEM(ret, i+1, npyarr->labels[i]); + npyarr->labels[i] = NULL; + } + else + { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(ret, i+1, Py_None); + } + } + PyTuple_SET_ITEM(ret, 0, npyarr->ret); + } + + return ret; +} + JSOBJ Object_npyEndArray(JSOBJ obj) { PyObject *ret; @@ -167,28 +195,10 @@ JSOBJ Object_npyEndArray(JSOBJ obj) { npyarr->ret = PyArray_Newshape((PyArrayObject*) ret, &npyarr->shape, NPY_ANYORDER); Py_DECREF(ret); - ret = npyarr->ret; } - if (npyarr->labels[0] || npyarr->labels[1]) - { - // finished decoding, build tuple with values and labels - ret = PyTuple_New(npyarr->shape.len+1); - for (i = 0; i < npyarr->shape.len; i++) - { - if (npyarr->labels[i]) - { - PyTuple_SET_ITEM(ret, i+1, npyarr->labels[i]); - npyarr->labels[i] = NULL; - } - else - { - Py_INCREF(Py_None); - PyTuple_SET_ITEM(ret, i+1, Py_None); - } - } - PyTuple_SET_ITEM(ret, 0, npyarr->ret); - } + ret = Npy_returnLabelled(npyarr); + npyarr->ret = NULL; Npy_releaseContext(npyarr); } @@ -252,6 +262,7 @@ int Object_npyArrayAddItem(JSOBJ obj, JSOBJ value) PyErr_SetString(PyExc_ValueError, "Cannot decode multidimensional arrays with variable length elements to numpy"); goto fail; } + npyarr->elcount = 0; npyarr->ret = PyList_New(0); if (!npyarr->ret) { @@ -333,7 +344,10 @@ JSOBJ Object_npyEndArrayList(JSOBJ obj) // convert decoded list to numpy array list = (PyObject *) npyarr->ret; - ret = PyArray_FROM_O(list); + npyarr->ret = PyArray_FROM_O(list); + + ret = Npy_returnLabelled(npyarr); + npyarr->ret = list; ((JSONObjectDecoder*)npyarr->dec)->newArray = Object_npyNewArray; ((JSONObjectDecoder*)npyarr->dec)->arrayAddItem = Object_npyArrayAddItem; @@ -352,6 +366,7 @@ int Object_npyArrayListAddItem(JSOBJ obj, JSOBJ value) } PyList_Append((PyObject*) npyarr->ret, value); Py_DECREF( (PyObject *) value); + npyarr->elcount++; return 1; } @@ -543,7 +558,7 @@ PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs) decoder = (JSONObjectDecoder*) &pyDecoder; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|iiO&", kwlist, &sarg, &numpy, &labelled, PyArray_DescrConverter, &dtype)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|iiO&", kwlist, &sarg, &numpy, &labelled, PyArray_DescrConverter2, &dtype)) { return NULL; } diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 06f47677c24f2..d2debd0e67dd2 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2195,7 +2195,7 @@ def test_from_json_nones(self): df = DataFrame([['1', '2'], ['4', '5', '6']]) unser = DataFrame.from_json(df.to_json()) - self.assert_(np.isnan(unser['2'][0])) + self.assert_(unser['2'][0] is None) unser = DataFrame.from_json(df.to_json(), numpy=False) self.assert_(unser['2'][0] is None)