Skip to content

Commit dd3cc48

Browse files
committed
BUG: DataFrame.to_json OverflowError with np.long* dtypes
1 parent b284101 commit dd3cc48

File tree

5 files changed

+53
-42
lines changed

5 files changed

+53
-42
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ I/O
348348
- Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`)
349349
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
350350
- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)
351+
- Bug in :meth:`DataFrame.to_json` OverflowError with np.long* dtypes (:issue:`55403`)
351352
- Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`)
352353

353354
Period

pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h

+15-14
Original file line numberDiff line numberDiff line change
@@ -138,19 +138,20 @@ typedef int64_t JSLONG;
138138
#endif
139139

140140
enum JSTYPES {
141-
JT_NULL, // NULL
142-
JT_TRUE, // boolean true
143-
JT_FALSE, // boolean false
144-
JT_INT, // (JSINT32 (signed 32-bit))
145-
JT_LONG, // (JSINT64 (signed 64-bit))
146-
JT_DOUBLE, // (double)
147-
JT_BIGNUM, // integer larger than sys.maxsize
148-
JT_UTF8, // (char 8-bit)
149-
JT_ARRAY, // Array structure
150-
JT_OBJECT, // Key/Value structure
151-
JT_INVALID, // Internal, do not return nor expect
152-
JT_POS_INF, // Positive infinity
153-
JT_NEG_INF, // Negative infinity
141+
JT_NULL, // NULL
142+
JT_TRUE, // boolean true
143+
JT_FALSE, // boolean false
144+
JT_INT, // (JSINT32 (signed 32-bit))
145+
JT_LONG, // (JSINT64 (signed 64-bit))
146+
JT_DOUBLE, // (double)
147+
JT_BIGNUM, // integer larger than sys.maxsize
148+
JT_UTF8, // (char 8-bit)
149+
JT_ARRAY, // Array structure
150+
JT_OBJECT, // Key/Value structure
151+
JT_INVALID, // Internal, do not return nor expect
152+
JT_POS_INF, // Positive infinity
153+
JT_NEG_INF, // Negative infinity
154+
JT_LONG_DOUBLE // Long Double
154155
};
155156

156157
typedef void * JSOBJ;
@@ -181,7 +182,7 @@ typedef struct __JSONObjectEncoder {
181182
size_t *_outLen);
182183
JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc);
183184
JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc);
184-
double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc);
185+
long double (*getLongDoubleValue)(JSOBJ obj, JSONTypeContext *tc);
185186
const char *(*getBigNumStringValue)(JSOBJ obj, JSONTypeContext *tc,
186187
size_t *_outLen);
187188

pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c

+14-14
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ The extra 2 bytes are for the quotes around the string
7474
*/
7575
#define RESERVE_STRING(_len) (2 + ((_len)*6))
7676

77-
static const double g_pow10[] = {1,
77+
static const long double g_pow10[] = {1,
7878
10,
7979
100,
8080
1000,
@@ -784,29 +784,29 @@ void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) {
784784
enc->offset += (wstr - (enc->offset));
785785
}
786786

787-
int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc,
788-
double value) {
787+
int Buffer_AppendLongDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc,
788+
long double value) {
789789
/* if input is beyond the thresholds, revert to exponential */
790-
const double thres_max = (double)1e16 - 1;
791-
const double thres_min = (double)1e-15;
790+
const long double thres_max = (long double)1e16 - 1;
791+
const long double thres_min = (long double)1e-15;
792792
char precision_str[20];
793793
int count;
794-
double diff = 0.0;
794+
long double diff = 0.0;
795795
char *str = enc->offset;
796796
char *wstr = str;
797797
unsigned long long whole;
798-
double tmp;
798+
long double tmp;
799799
unsigned long long frac;
800800
int neg;
801-
double pow10;
801+
long double pow10;
802802

803803
if (value == HUGE_VAL || value == -HUGE_VAL) {
804-
SetError(obj, enc, "Invalid Inf value when encoding double");
804+
SetError(obj, enc, "Invalid Inf value when encoding long double");
805805
return FALSE;
806806
}
807807

808808
if (!(value == value)) {
809-
SetError(obj, enc, "Invalid Nan value when encoding double");
809+
SetError(obj, enc, "Invalid Nan value when encoding long double");
810810
return FALSE;
811811
}
812812

@@ -942,7 +942,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
942942
This reservation must hold
943943
944944
length of _name as encoded worst case +
945-
maxLength of double to string OR maxLength of JSLONG to string
945+
maxLength of long double to string OR maxLength of JSLONG to string
946946
*/
947947

948948
Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName));
@@ -1076,9 +1076,9 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
10761076
break;
10771077
}
10781078

1079-
case JT_DOUBLE: {
1080-
if (!Buffer_AppendDoubleUnchecked(obj, enc,
1081-
enc->getDoubleValue(obj, &tc))) {
1079+
case JT_LONG_DOUBLE: {
1080+
if (!Buffer_AppendLongDoubleUnchecked(obj, enc,
1081+
enc->getLongDoubleValue(obj, &tc))) {
10821082
enc->endTypeContext(obj, &tc);
10831083
enc->level--;
10841084
return;

pandas/_libs/src/vendored/ujson/python/objToJSON.c

+15-14
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ typedef struct __TypeContext {
105105
PyObject *attrList;
106106
PyObject *iterator;
107107

108-
double doubleValue;
108+
long double longDoubleValue;
109109
JSINT64 longValue;
110110

111111
char *cStr;
@@ -164,7 +164,7 @@ static TypeContext *createTypeContext(void) {
164164
pc->index = 0;
165165
pc->size = 0;
166166
pc->longValue = 0;
167-
pc->doubleValue = 0.0;
167+
pc->longDoubleValue = (long double) 0.0;
168168
pc->cStr = NULL;
169169
pc->npyarr = NULL;
170170
pc->pdblock = NULL;
@@ -1494,8 +1494,8 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
14941494
if (npy_isnan(val) || npy_isinf(val)) {
14951495
tc->type = JT_NULL;
14961496
} else {
1497-
pc->doubleValue = val;
1498-
tc->type = JT_DOUBLE;
1497+
pc->longDoubleValue = (long double) val;
1498+
tc->type = JT_LONG_DOUBLE;
14991499
}
15001500
return;
15011501
} else if (PyBytes_Check(obj)) {
@@ -1507,8 +1507,8 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
15071507
tc->type = JT_UTF8;
15081508
return;
15091509
} else if (object_is_decimal_type(obj)) {
1510-
pc->doubleValue = PyFloat_AsDouble(obj);
1511-
tc->type = JT_DOUBLE;
1510+
pc->longDoubleValue = (long double) PyFloat_AsDouble(obj);
1511+
tc->type = JT_LONG_DOUBLE;
15121512
return;
15131513
} else if (PyDateTime_Check(obj) || PyDate_Check(obj)) {
15141514
if (object_is_nat_type(obj)) {
@@ -1605,10 +1605,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
16051605
PyArray_DescrFromType(NPY_BOOL));
16061606
tc->type = (pc->longValue) ? JT_TRUE : JT_FALSE;
16071607
return;
1608-
} else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) {
1609-
PyArray_CastScalarToCtype(obj, &(pc->doubleValue),
1610-
PyArray_DescrFromType(NPY_DOUBLE));
1611-
tc->type = JT_DOUBLE;
1608+
} else if (PyArray_IsScalar(obj, Float) ||
1609+
PyArray_IsScalar(obj, Double) ||
1610+
PyArray_IsScalar(obj, LongDouble)) {
1611+
PyArray_CastScalarToCtype(obj, &(pc->longDoubleValue),
1612+
PyArray_DescrFromType(NPY_LONGDOUBLE));
1613+
tc->type = JT_LONG_DOUBLE;
16121614
return;
16131615
} else if (PyArray_Check(obj) && PyArray_CheckScalar(obj)) {
16141616
PyErr_Format(PyExc_TypeError,
@@ -1925,8 +1927,8 @@ JSINT64 Object_getLongValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
19251927
return GET_TC(tc)->longValue;
19261928
}
19271929

1928-
double Object_getDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
1929-
return GET_TC(tc)->doubleValue;
1930+
long double Object_getLongDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
1931+
return GET_TC(tc)->longDoubleValue;
19301932
}
19311933

19321934
const char *Object_getBigNumStringValue(JSOBJ obj, JSONTypeContext *tc,
@@ -1970,7 +1972,6 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args,
19701972
if (PyDateTimeAPI == NULL) {
19711973
return NULL;
19721974
}
1973-
19741975
PandasDateTime_IMPORT;
19751976
if (PandasDateTimeAPI == NULL) {
19761977
return NULL;
@@ -2006,7 +2007,7 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args,
20062007
Object_getStringValue,
20072008
Object_getLongValue,
20082009
NULL, // getIntValue is unused
2009-
Object_getDoubleValue,
2010+
Object_getLongDoubleValue,
20102011
Object_getBigNumStringValue,
20112012
Object_iterBegin,
20122013
Object_iterNext,

pandas/tests/io/json/test_ujson.py

+8
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,14 @@ def test_float_array(self, float_numpy_dtype):
762762
)
763763
tm.assert_almost_equal(float_input, float_output)
764764

765+
def test_array_long_double(self):
766+
dtype = np.longdouble
767+
arr = np.arange(100.202, 200.202, 1, dtype=dtype)
768+
arr = arr.reshape((5, 5, 4))
769+
770+
arr_out = np.array(ujson.ujson_loads(ujson.ujson_dumps(arr)), dtype=dtype)
771+
tm.assert_almost_equal(arr, arr_out)
772+
765773
def test_float_max(self, float_numpy_dtype):
766774
klass = np.dtype(float_numpy_dtype).type
767775
num = klass(np.finfo(float_numpy_dtype).max / 10)

0 commit comments

Comments
 (0)