Skip to content

Commit 8001e15

Browse files
Komnomnomnomjreback
authored andcommitted
BUG: json invoke default handler for unsupported numpy dtypes
closes #12554 Author: Kieran O'Mahony <[email protected]> Closes #12878 from Komnomnomnom/np-default-handler and squashes the following commits: daa256c [Kieran O'Mahony] BUG: json default handler not invoked when unsupported numpy dtypes encountered
1 parent 1e0b228 commit 8001e15

File tree

4 files changed

+91
-45
lines changed

4 files changed

+91
-45
lines changed

doc/source/io.rst

+16-27
Original file line numberDiff line numberDiff line change
@@ -1501,45 +1501,34 @@ Fallback Behavior
15011501

15021502
If the JSON serializer cannot handle the container contents directly it will fallback in the following manner:
15031503

1504-
- if a ``toDict`` method is defined by the unrecognised object then that
1505-
will be called and its returned ``dict`` will be JSON serialized.
1506-
- if a ``default_handler`` has been passed to ``to_json`` that will
1507-
be called to convert the object.
1508-
- otherwise an attempt is made to convert the object to a ``dict`` by
1509-
parsing its contents. However if the object is complex this will often fail
1510-
with an ``OverflowError``.
1504+
- if the dtype is unsupported (e.g. ``np.complex``) then the ``default_handler``, if provided, will be called
1505+
for each value, otherwise an exception is raised.
15111506

1512-
Your best bet when encountering ``OverflowError`` during serialization
1513-
is to specify a ``default_handler``. For example ``timedelta`` can cause
1514-
problems:
1507+
- if an object is unsupported it will attempt the following:
15151508

1516-
.. ipython:: python
1517-
:suppress:
15181509

1519-
from datetime import timedelta
1520-
dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42])
1510+
* check if the object has defined a ``toDict`` method and call it.
1511+
A ``toDict`` method should return a ``dict`` which will then be JSON serialized.
15211512

1522-
.. code-block:: ipython
1513+
* invoke the ``default_handler`` if one was provided.
15231514

1524-
In [141]: from datetime import timedelta
1515+
* convert the object to a ``dict`` by traversing its contents. However this will often fail
1516+
with an ``OverflowError`` or give unexpected results.
15251517

1526-
In [142]: dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42])
1518+
In general the best approach for unsupported objects or dtypes is to provide a ``default_handler``.
1519+
For example:
1520+
1521+
.. code-block:: python
15271522
1528-
In [143]: dftd.to_json()
1523+
DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json() # raises
15291524
1530-
---------------------------------------------------------------------------
1531-
OverflowError Traceback (most recent call last)
1532-
OverflowError: Maximum recursion level reached
1525+
RuntimeError: Unhandled numpy dtype 15
15331526
1534-
which can be dealt with by specifying a simple ``default_handler``:
1527+
can be dealt with by specifying a simple ``default_handler``:
15351528

15361529
.. ipython:: python
15371530
1538-
dftd.to_json(default_handler=str)
1539-
1540-
def my_handler(obj):
1541-
return obj.total_seconds()
1542-
dftd.to_json(default_handler=my_handler)
1531+
DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json(default_handler=str)
15431532
15441533
.. _io.json_reader:
15451534

doc/source/whatsnew/v0.18.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,8 @@ Bug Fixes
458458

459459
- Bug in ``.loc`` with out-of-bounds in a large indexer would raise ``IndexError`` rather than ``KeyError`` (:issue:`12527`)
460460
- Bug in resampling when using a ``TimedeltaIndex`` and ``.asfreq()``, would previously not include the final fencepost (:issue:`12926`)
461+
- Bug in ``DataFrame.to_json`` with unsupported `dtype` not passed to default handler (:issue:`12554`).
462+
461463
- Bug in equality testing with a ``Categorical`` in a ``DataFrame`` (:issue:`12564`)
462464
- Bug in ``GroupBy.first()``, ``.last()`` returns incorrect row when ``TimeGrouper`` is used (:issue:`7453`)
463465

pandas/io/tests/json/test_pandas.py

+33-2
Original file line numberDiff line numberDiff line change
@@ -809,17 +809,48 @@ def test_mixed_timedelta_datetime(self):
809809

810810
def test_default_handler(self):
811811
value = object()
812-
frame = DataFrame({'a': ['a', value]})
813-
expected = frame.applymap(str)
812+
frame = DataFrame({'a': [7, value]})
813+
expected = DataFrame({'a': [7, str(value)]})
814814
result = pd.read_json(frame.to_json(default_handler=str))
815815
assert_frame_equal(expected, result, check_index_type=False)
816816

817+
def test_default_handler_indirect(self):
818+
from pandas.io.json import dumps
819+
820+
def default(obj):
821+
if isinstance(obj, complex):
822+
return [('mathjs', 'Complex'),
823+
('re', obj.real),
824+
('im', obj.imag)]
825+
return str(obj)
826+
df_list = [9, DataFrame({'a': [1, 'STR', complex(4, -5)],
827+
'b': [float('nan'), None, 'N/A']},
828+
columns=['a', 'b'])]
829+
expected = ('[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
830+
'["re",4.0],["im",-5.0]],"N\\/A"]]]')
831+
self.assertEqual(expected, dumps(df_list, default_handler=default,
832+
orient="values"))
833+
834+
def test_default_handler_numpy_unsupported_dtype(self):
835+
# GH12554 to_json raises 'Unhandled numpy dtype 15'
836+
df = DataFrame({'a': [1, 2.3, complex(4, -5)],
837+
'b': [float('nan'), None, complex(1.2, 0)]},
838+
columns=['a', 'b'])
839+
expected = ('[["(1+0j)","(nan+0j)"],'
840+
'["(2.3+0j)","(nan+0j)"],'
841+
'["(4-5j)","(1.2+0j)"]]')
842+
self.assertEqual(expected, df.to_json(default_handler=str,
843+
orient="values"))
844+
817845
def test_default_handler_raises(self):
818846
def my_handler_raises(obj):
819847
raise TypeError("raisin")
820848
self.assertRaises(TypeError,
821849
DataFrame({'a': [1, 2, object()]}).to_json,
822850
default_handler=my_handler_raises)
851+
self.assertRaises(TypeError,
852+
DataFrame({'a': [1, 2, complex(4, -5)]}).to_json,
853+
default_handler=my_handler_raises)
823854

824855
def test_categorical(self):
825856
# GH4377 df.to_json segfaults with non-ndarray blocks

pandas/src/ujson/python/objToJSON.c

+40-16
Original file line numberDiff line numberDiff line change
@@ -636,10 +636,6 @@ static int NpyTypeToJSONType(PyObject* obj, JSONTypeContext* tc, int npyType, vo
636636
}
637637

638638
PRINTMARK();
639-
PyErr_Format (
640-
PyExc_RuntimeError,
641-
"Unhandled numpy dtype %d",
642-
npyType);
643639
return JT_INVALID;
644640
}
645641

@@ -791,6 +787,7 @@ int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc)
791787
Py_INCREF(obj);
792788
((PyObjectEncoder*) tc->encoder)->npyType = PyArray_TYPE(npyarr->array);
793789
((PyObjectEncoder*) tc->encoder)->npyValue = npyarr->dataptr;
790+
((PyObjectEncoder*) tc->encoder)->npyCtxtPassthru = npyarr;
794791
}
795792
else
796793
{
@@ -1917,6 +1914,26 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in
19171914
return ret;
19181915
}
19191916

1917+
void Object_invokeDefaultHandler(PyObject *obj, PyObjectEncoder *enc)
1918+
{
1919+
PyObject *tmpObj = NULL;
1920+
PRINTMARK();
1921+
tmpObj = PyObject_CallFunctionObjArgs(enc->defaultHandler, obj, NULL);
1922+
if (!PyErr_Occurred())
1923+
{
1924+
if (tmpObj == NULL)
1925+
{
1926+
PyErr_SetString(PyExc_TypeError, "Failed to execute default handler");
1927+
}
1928+
else
1929+
{
1930+
encode (tmpObj, (JSONObjectEncoder*) enc, NULL, 0);
1931+
}
1932+
}
1933+
Py_XDECREF(tmpObj);
1934+
return;
1935+
}
1936+
19201937
void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)
19211938
{
19221939
PyObject *obj, *exc, *toDictFunc, *tmpObj, *values;
@@ -1942,6 +1959,24 @@ void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)
19421959
PRINTMARK();
19431960
tc->prv = &(enc->basicTypeContext);
19441961
tc->type = NpyTypeToJSONType(obj, tc, enc->npyType, enc->npyValue);
1962+
1963+
if (tc->type == JT_INVALID)
1964+
{
1965+
if(enc->defaultHandler)
1966+
{
1967+
enc->npyType = -1;
1968+
PRINTMARK();
1969+
Object_invokeDefaultHandler(enc->npyCtxtPassthru->getitem(enc->npyValue, enc->npyCtxtPassthru->array), enc);
1970+
}
1971+
else
1972+
{
1973+
PyErr_Format (
1974+
PyExc_RuntimeError,
1975+
"Unhandled numpy dtype %d",
1976+
enc->npyType);
1977+
}
1978+
}
1979+
enc->npyCtxtPassthru = NULL;
19451980
enc->npyType = -1;
19461981
return;
19471982
}
@@ -2528,18 +2563,7 @@ void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)
25282563

25292564
if (enc->defaultHandler)
25302565
{
2531-
PRINTMARK();
2532-
tmpObj = PyObject_CallFunctionObjArgs(enc->defaultHandler, obj, NULL);
2533-
if (tmpObj == NULL || PyErr_Occurred())
2534-
{
2535-
if (!PyErr_Occurred())
2536-
{
2537-
PyErr_SetString(PyExc_TypeError, "Failed to execute default handler");
2538-
}
2539-
goto INVALID;
2540-
}
2541-
encode (tmpObj, (JSONObjectEncoder*) enc, NULL, 0);
2542-
Py_DECREF(tmpObj);
2566+
Object_invokeDefaultHandler(obj, enc);
25432567
goto INVALID;
25442568
}
25452569

0 commit comments

Comments
 (0)