Skip to content

Commit 3755762

Browse files
authored
JSON Support for parsing NaN, Infinity and -Infinity (pandas-dev#30295)
1 parent 4168e0c commit 3755762

File tree

5 files changed

+75
-5
lines changed

5 files changed

+75
-5
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ Other enhancements
216216
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
217217
now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
218218
- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
219+
- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`)
219220
- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
220221
- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
221222
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)

pandas/_libs/src/ujson/lib/ultrajson.h

+4
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ enum JSTYPES {
154154
JT_ARRAY, // Array structure
155155
JT_OBJECT, // Key/Value structure
156156
JT_INVALID, // Internal, do not return nor expect
157+
JT_POS_INF, // Positive infinity
158+
JT_NEG_INF, // Negative infinity
157159
};
158160

159161
typedef void * JSOBJ;
@@ -290,6 +292,8 @@ typedef struct __JSONObjectDecoder {
290292
JSOBJ (*newTrue)(void *prv);
291293
JSOBJ (*newFalse)(void *prv);
292294
JSOBJ (*newNull)(void *prv);
295+
JSOBJ (*newPosInf)(void *prv);
296+
JSOBJ (*newNegInf)(void *prv);
293297
JSOBJ (*newObject)(void *prv, void *decoder);
294298
JSOBJ (*endObject)(void *prv, JSOBJ obj);
295299
JSOBJ (*newArray)(void *prv, void *decoder);

pandas/_libs/src/ujson/lib/ultrajsondec.c

+52-1
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,16 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) {
127127

128128
JSUINT64 overflowLimit = LLONG_MAX;
129129

130-
if (*(offset) == '-') {
130+
if (*(offset) == 'I') {
131+
goto DECODE_INF;
132+
} else if (*(offset) == 'N') {
133+
goto DECODE_NAN;
134+
} else if (*(offset) == '-') {
131135
offset++;
132136
intNeg = -1;
137+
if (*(offset) == 'I') {
138+
goto DECODE_INF;
139+
}
133140
overflowLimit = LLONG_MIN;
134141
}
135142

@@ -281,6 +288,48 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) {
281288
}
282289
}
283290

291+
DECODE_NAN:
292+
offset++;
293+
if (*(offset++) != 'a') goto SET_NAN_ERROR;
294+
if (*(offset++) != 'N') goto SET_NAN_ERROR;
295+
296+
ds->lastType = JT_NULL;
297+
ds->start = offset;
298+
return ds->dec->newNull(ds->prv);
299+
300+
SET_NAN_ERROR:
301+
return SetError(ds, -1, "Unexpected character found when decoding 'NaN'");
302+
303+
DECODE_INF:
304+
offset++;
305+
if (*(offset++) != 'n') goto SET_INF_ERROR;
306+
if (*(offset++) != 'f') goto SET_INF_ERROR;
307+
if (*(offset++) != 'i') goto SET_INF_ERROR;
308+
if (*(offset++) != 'n') goto SET_INF_ERROR;
309+
if (*(offset++) != 'i') goto SET_INF_ERROR;
310+
if (*(offset++) != 't') goto SET_INF_ERROR;
311+
if (*(offset++) != 'y') goto SET_INF_ERROR;
312+
313+
ds->start = offset;
314+
315+
if (intNeg == 1) {
316+
ds->lastType = JT_POS_INF;
317+
return ds->dec->newPosInf(ds->prv);
318+
} else {
319+
ds->lastType = JT_NEG_INF;
320+
return ds->dec->newNegInf(ds->prv);
321+
}
322+
323+
SET_INF_ERROR:
324+
if (intNeg == 1) {
325+
const char *msg = "Unexpected character found when decoding 'Infinity'";
326+
return SetError(ds, -1, msg);
327+
} else {
328+
const char *msg = "Unexpected character found when decoding '-Infinity'";
329+
return SetError(ds, -1, msg);
330+
}
331+
332+
284333
BREAK_EXP_LOOP:
285334
// FIXME: Check for arithmetic overflow here
286335
ds->lastType = JT_DOUBLE;
@@ -1070,6 +1119,8 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) {
10701119
case '7':
10711120
case '8':
10721121
case '9':
1122+
case 'I':
1123+
case 'N':
10731124
case '-':
10741125
return decode_numeric(ds);
10751126

pandas/_libs/src/ujson/python/JSONtoObj.c

+9-4
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,10 @@ JSOBJ Object_newFalse(void *prv) { Py_RETURN_FALSE; }
459459

460460
JSOBJ Object_newNull(void *prv) { Py_RETURN_NONE; }
461461

462+
JSOBJ Object_newPosInf(void *prv) { return PyFloat_FromDouble(Py_HUGE_VAL); }
463+
464+
JSOBJ Object_newNegInf(void *prv) { return PyFloat_FromDouble(-Py_HUGE_VAL); }
465+
462466
JSOBJ Object_newObject(void *prv, void *decoder) { return PyDict_New(); }
463467

464468
JSOBJ Object_endObject(void *prv, JSOBJ obj) { return obj; }
@@ -502,10 +506,11 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
502506
JSONObjectDecoder dec = {
503507
Object_newString, Object_objectAddKey, Object_arrayAddItem,
504508
Object_newTrue, Object_newFalse, Object_newNull,
505-
Object_newObject, Object_endObject, Object_newArray,
506-
Object_endArray, Object_newInteger, Object_newLong,
507-
Object_newDouble, Object_releaseObject, PyObject_Malloc,
508-
PyObject_Free, PyObject_Realloc};
509+
Object_newPosInf, Object_newNegInf, Object_newObject,
510+
Object_endObject, Object_newArray, Object_endArray,
511+
Object_newInteger, Object_newLong, Object_newDouble,
512+
Object_releaseObject, PyObject_Malloc, PyObject_Free,
513+
PyObject_Realloc};
509514

510515
dec.preciseFloat = 0;
511516
dec.prv = NULL;

pandas/tests/io/json/test_pandas.py

+9
Original file line numberDiff line numberDiff line change
@@ -1597,3 +1597,12 @@ def test_json_indent_all_orients(self, orient, expected):
15971597
def test_json_negative_indent_raises(self):
15981598
with pytest.raises(ValueError, match="must be a nonnegative integer"):
15991599
pd.DataFrame().to_json(indent=-1)
1600+
1601+
def test_emca_262_nan_inf_support(self):
1602+
# GH 12213
1603+
data = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
1604+
result = pd.read_json(data)
1605+
expected = pd.DataFrame(
1606+
["a", np.nan, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"]
1607+
)
1608+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)