Skip to content

Commit 0b279ba

Browse files
committed
BUG: DataFrame.to_json OverflowError with np.long* dtypes
1 parent b284101 commit 0b279ba

File tree

5 files changed

+49
-15
lines changed

5 files changed

+49
-15
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ I/O
348348
- Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`)
349349
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
350350
- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)
351+
- Bug in :meth:`DataFrame.to_json` OverflowError with np.long* dtypes (:issue:`55403`)
351352
- Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`)
352353

353354
Period

pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h

+2
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ enum JSTYPES {
151151
JT_INVALID, // Internal, do not return nor expect
152152
JT_POS_INF, // Positive infinity
153153
JT_NEG_INF, // Negative infinity
154+
JT_LONGDOUBLE, // (long double)
154155
};
155156

156157
typedef void * JSOBJ;
@@ -182,6 +183,7 @@ typedef struct __JSONObjectEncoder {
182183
JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc);
183184
JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc);
184185
double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc);
186+
long double (*getLongDoubleValue)(JSOBJ obj, JSONTypeContext *tc);
185187
const char *(*getBigNumStringValue)(JSOBJ obj, JSONTypeContext *tc,
186188
size_t *_outLen);
187189

pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c

+26-15
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ The extra 2 bytes are for the quotes around the string
7474
*/
7575
#define RESERVE_STRING(_len) (2 + ((_len)*6))
7676

77-
static const double g_pow10[] = {1,
77+
static const long double g_pow10[] = {1,
7878
10,
7979
100,
8080
1000,
@@ -784,29 +784,29 @@ void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) {
784784
enc->offset += (wstr - (enc->offset));
785785
}
786786

787-
int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc,
788-
double value) {
787+
int Buffer_AppendLongDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc,
788+
long double value) {
789789
/* if input is beyond the thresholds, revert to exponential */
790-
const double thres_max = (double)1e16 - 1;
791-
const double thres_min = (double)1e-15;
790+
const long double thres_max = (long double)1e16 - 1;
791+
const long double thres_min = (long double)1e-15;
792792
char precision_str[20];
793793
int count;
794-
double diff = 0.0;
794+
long double diff = 0.0;
795795
char *str = enc->offset;
796796
char *wstr = str;
797797
unsigned long long whole;
798-
double tmp;
798+
long double tmp;
799799
unsigned long long frac;
800800
int neg;
801-
double pow10;
801+
long double pow10;
802802

803803
if (value == HUGE_VAL || value == -HUGE_VAL) {
804-
SetError(obj, enc, "Invalid Inf value when encoding double");
804+
SetError(obj, enc, "Invalid Inf value when encoding long double");
805805
return FALSE;
806806
}
807807

808808
if (!(value == value)) {
809-
SetError(obj, enc, "Invalid Nan value when encoding double");
809+
SetError(obj, enc, "Invalid Nan value when encoding long double");
810810
return FALSE;
811811
}
812812

@@ -825,12 +825,12 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc,
825825
precision_str[0] = '%';
826826
precision_str[1] = '.';
827827
#if defined(_WIN32) && defined(_MSC_VER)
828-
sprintf_s(precision_str + 2, sizeof(precision_str) - 2, "%ug",
828+
sprintf_s(precision_str + 2, sizeof(precision_str) - 2, "%uLg",
829829
enc->doublePrecision);
830830
enc->offset += sprintf_s(str, enc->end - enc->offset, precision_str,
831831
neg ? -value : value);
832832
#else
833-
snprintf(precision_str + 2, sizeof(precision_str) - 2, "%ug",
833+
snprintf(precision_str + 2, sizeof(precision_str) - 2, "%uLg",
834834
enc->doublePrecision);
835835
enc->offset += snprintf(str, enc->end - enc->offset, precision_str,
836836
neg ? -value : value);
@@ -915,6 +915,7 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc,
915915
return TRUE;
916916
}
917917

918+
918919
/*
919920
FIXME:
920921
Handle integration functions returning NULL here */
@@ -942,7 +943,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
942943
This reservation must hold
943944
944945
length of _name as encoded worst case +
945-
maxLength of double to string OR maxLength of JSLONG to string
946+
maxLength of long double to string OR maxLength of JSLONG to string
946947
*/
947948

948949
Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName));
@@ -1077,8 +1078,18 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
10771078
}
10781079

10791080
case JT_DOUBLE: {
1080-
if (!Buffer_AppendDoubleUnchecked(obj, enc,
1081-
enc->getDoubleValue(obj, &tc))) {
1081+
if (!Buffer_AppendLongDoubleUnchecked(obj, enc,
1082+
(long double) enc->getDoubleValue(obj, &tc))) {
1083+
enc->endTypeContext(obj, &tc);
1084+
enc->level--;
1085+
return;
1086+
}
1087+
break;
1088+
}
1089+
1090+
case JT_LONGDOUBLE: {
1091+
if (!Buffer_AppendLongDoubleUnchecked(obj, enc,
1092+
enc->getLongDoubleValue(obj, &tc))) {
10821093
enc->endTypeContext(obj, &tc);
10831094
enc->level--;
10841095
return;

pandas/_libs/src/vendored/ujson/python/objToJSON.c

+12
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ typedef struct __TypeContext {
106106
PyObject *iterator;
107107

108108
double doubleValue;
109+
long double longDoubleValue;
109110
JSINT64 longValue;
110111

111112
char *cStr;
@@ -165,6 +166,7 @@ static TypeContext *createTypeContext(void) {
165166
pc->size = 0;
166167
pc->longValue = 0;
167168
pc->doubleValue = 0.0;
169+
pc->longDoubleValue = 0.0L;
168170
pc->cStr = NULL;
169171
pc->npyarr = NULL;
170172
pc->pdblock = NULL;
@@ -1610,6 +1612,11 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
16101612
PyArray_DescrFromType(NPY_DOUBLE));
16111613
tc->type = JT_DOUBLE;
16121614
return;
1615+
} else if (PyArray_IsScalar(obj, LongDouble)) {
1616+
PyArray_CastScalarToCtype(obj, &(pc->longDoubleValue),
1617+
PyArray_DescrFromType(NPY_LONGDOUBLE));
1618+
tc->type = JT_LONGDOUBLE;
1619+
return;
16131620
} else if (PyArray_Check(obj) && PyArray_CheckScalar(obj)) {
16141621
PyErr_Format(PyExc_TypeError,
16151622
"%R (0d array) is not JSON serializable at the moment",
@@ -1929,6 +1936,10 @@ double Object_getDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
19291936
return GET_TC(tc)->doubleValue;
19301937
}
19311938

1939+
double Object_getLongDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
1940+
return GET_TC(tc)->longDoubleValue;
1941+
}
1942+
19321943
const char *Object_getBigNumStringValue(JSOBJ obj, JSONTypeContext *tc,
19331944
size_t *_outLen) {
19341945
PyObject *repr = PyObject_Str(obj);
@@ -2007,6 +2018,7 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args,
20072018
Object_getLongValue,
20082019
NULL, // getIntValue is unused
20092020
Object_getDoubleValue,
2021+
Object_getLongDoubleValue,
20102022
Object_getBigNumStringValue,
20112023
Object_iterBegin,
20122024
Object_iterNext,

pandas/tests/io/json/test_ujson.py

+8
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,14 @@ def test_float_array(self, float_numpy_dtype):
762762
)
763763
tm.assert_almost_equal(float_input, float_output)
764764

765+
def test_array_long_double(self):
766+
dtype = np.longdouble
767+
arr = np.arange(100.202, 200.202, 1, dtype=dtype)
768+
arr = arr.reshape((5, 5, 4))
769+
770+
arr_out = np.array(ujson.ujson_loads(ujson.ujson_dumps(arr)), dtype=dtype)
771+
tm.assert_almost_equal(arr, arr_out)
772+
765773
def test_float_max(self, float_numpy_dtype):
766774
klass = np.dtype(float_numpy_dtype).type
767775
num = klass(np.finfo(float_numpy_dtype).max / 10)

0 commit comments

Comments
 (0)