From d22da4da85336c6b3367af7fe61efcdda2c99358 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 25 Jun 2020 07:41:33 +0000 Subject: [PATCH 01/19] TST: removed xfail from json.decode(long int) --- pandas/tests/io/json/test_ujson.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 952c583040360..d3daf8ba57ef2 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -568,8 +568,7 @@ def test_dumps_ints_larger_than_maxsize(self, bigNum): assert str(bigNum) == encoding # GH20599 - with pytest.raises(ValueError): - assert ujson.loads(encoding) == bigNum + assert ujson.decode(encoding) == bigNum @pytest.mark.parametrize( "int_exp", ["1337E40", "1.337E40", "1337E+9", "1.337e+40", "1.337E-4"] From 46d05a303eee2c8837f1c4abd3fc16d80dc576ca Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 25 Jun 2020 20:06:07 +0000 Subject: [PATCH 02/19] added Object_newBigNum to JSONtoObj.c --- pandas/_libs/src/ujson/python/JSONtoObj.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index 3db10237b2688..9ee2eb2e1c1ec 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -483,6 +483,12 @@ JSOBJ Object_newDouble(void *prv, double value) { return PyFloat_FromDouble(value); } +JSOBJ Object_newBigNum(void* prv, wchar_t *start, wchar_t *end) { + PyObject* obj_as_unicode; + obj_as_unicode = PyUnicode_FromWideChar(start, (end - start)); + return PyLong_FromUnicode(obj_as_unicode, 0); +} + static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) { PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; if (obj != decoder->npyarr_addr) { @@ -509,8 +515,8 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { Object_newPosInf, Object_newNegInf, Object_newObject, Object_endObject, Object_newArray, Object_endArray, Object_newInteger, Object_newLong, Object_newDouble, - Object_releaseObject, PyObject_Malloc, PyObject_Free, - PyObject_Realloc}; + Object_newBigNum, Object_releaseObject, PyObject_Malloc, + PyObject_Free, PyObject_Realloc}; dec.preciseFloat = 0; dec.prv = NULL; From cce506cb8ee47db49ca62f5e568419ce88c1703f Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 25 Jun 2020 20:10:20 +0000 Subject: [PATCH 03/19] bug: changed PyLong_FromUnicode to PyLong_FromUnicodeObject --- pandas/_libs/src/ujson/python/JSONtoObj.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index 9ee2eb2e1c1ec..49fb271c755b5 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -486,7 +486,7 @@ JSOBJ Object_newDouble(void *prv, double value) { JSOBJ Object_newBigNum(void* prv, wchar_t *start, wchar_t *end) { PyObject* obj_as_unicode; obj_as_unicode = PyUnicode_FromWideChar(start, (end - start)); - return PyLong_FromUnicode(obj_as_unicode, 0); + return PyLong_FromUnicodeObject(obj_as_unicode, 0); } static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) { From a0a6846c9d2e27799b2e0f781504b0b9db4c2cdb Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 25 Jun 2020 20:16:24 +0000 Subject: [PATCH 04/19] updated JSONObjectDecoder --- pandas/_libs/src/ujson/lib/ultrajson.h | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index 69284e1c3f2ab..4ea5f2c7d8da5 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -304,6 +304,7 @@ typedef struct __JSONObjectDecoder { JSOBJ (*newInt)(void *prv, JSINT32 value); JSOBJ (*newLong)(void *prv, JSINT64 value); JSOBJ (*newDouble)(void *prv, double value); + JSOBJ (*newBigNum)(void *prv, wchar_t *start, wchar_t *end); void (*releaseObject)(void *prv, JSOBJ obj, void *decoder); JSPFN_MALLOC malloc; JSPFN_FREE free; From c60cabcc4089c4c6a9da4583002d6da0ff45add0 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 26 Jun 2020 14:58:53 +0000 Subject: [PATCH 05/19] added case DECODE_BIGNUM to decode_numeric --- pandas/_libs/src/ujson/lib/ultrajsondec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index 36eb170f8048f..4c105f244f973 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -202,6 +202,10 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { return ds->dec->newInt(ds->prv, (JSINT32)(intValue * intNeg)); } +DECODE_BIGNUM: + + + DECODE_FRACTION: if (ds->dec->preciseFloat) { From 3c91879e0f06ebf40b7f80ab6b88a05da1446201 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 26 Jun 2020 19:17:34 +0000 Subject: [PATCH 06/19] fixed if statement to reset overflowing int --- pandas/_libs/src/ujson/lib/ultrajsondec.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index 4c105f244f973..0e15a164ceb42 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -118,6 +118,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { int intNeg = 1; int mantSize = 0; JSUINT64 intValue; + JSLONG newDigit; int chr; int decimalCount = 0; double frcValue = 0.0; @@ -145,6 +146,8 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { while (1) { chr = (int)(unsigned char)*(offset); + printf("chr=%u \n", chr); + printf("intValue: %lu\n", intValue); switch (chr) { case '0': @@ -160,12 +163,16 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { // FIXME: Check for arithmetic overflow here // PERF: Don't do 64-bit arithmetic here unless we know we have // to - intValue = intValue * 10ULL + (JSLONG)(chr - 48); + newDigit = (JSLONG)(chr - 48); - if (intValue > overflowLimit) { - return SetError(ds, -1, overflowLimit == LLONG_MAX - ? "Value is too big" - : "Value is too small"); + if (intValue*10ULL + newDigit > overflowLimit) { + printf("intValue=%lu\n", intValue); + // TO DO: store current intValue for later processing + // then reset intValue + intValue = (newDigit==0) ? 10 : newDigit; + } + else { + intValue = intValue * 10ULL + newDigit; } offset++; @@ -202,10 +209,6 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { return ds->dec->newInt(ds->prv, (JSINT32)(intValue * intNeg)); } -DECODE_BIGNUM: - - - DECODE_FRACTION: if (ds->dec->preciseFloat) { From e4eb0af0c0f01113a4bc27a0855cec01988f5f89 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 26 Jun 2020 19:18:39 +0000 Subject: [PATCH 07/19] removed debug statement --- pandas/_libs/src/ujson/lib/ultrajsondec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index 0e15a164ceb42..143d52f5055b6 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -146,8 +146,6 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { while (1) { chr = (int)(unsigned char)*(offset); - printf("chr=%u \n", chr); - printf("intValue: %lu\n", intValue); switch (chr) { case '0': @@ -166,7 +164,6 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { newDigit = (JSLONG)(chr - 48); if (intValue*10ULL + newDigit > overflowLimit) { - printf("intValue=%lu\n", intValue); // TO DO: store current intValue for later processing // then reset intValue intValue = (newDigit==0) ? 10 : newDigit; From f1a3e0a9ad28370ce689dab9dc31e6d03beaa2ee Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 26 Jun 2020 20:54:07 +0000 Subject: [PATCH 08/19] added cStr variable to PyObjectDecoder --- pandas/_libs/src/ujson/lib/ultrajson.h | 2 +- pandas/_libs/src/ujson/python/JSONtoObj.c | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index 4ea5f2c7d8da5..3c1f1ef40ef4b 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -304,7 +304,7 @@ typedef struct __JSONObjectDecoder { JSOBJ (*newInt)(void *prv, JSINT32 value); JSOBJ (*newLong)(void *prv, JSINT64 value); JSOBJ (*newDouble)(void *prv, double value); - JSOBJ (*newBigNum)(void *prv, wchar_t *start, wchar_t *end); + JSOBJ (*newBigNum)(void *prv, void *decoder); void (*releaseObject)(void *prv, JSOBJ obj, void *decoder); JSPFN_MALLOC malloc; JSPFN_FREE free; diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index 49fb271c755b5..49877412e55f5 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -51,6 +51,8 @@ typedef struct __PyObjectDecoder { void *npyarr_addr; // Ref to npyarr ptr to track DECREF calls npy_intp curdim; // Current array dimension + char *cStr; // storage for BigNum + PyArray_Descr *dtype; } PyObjectDecoder; @@ -483,10 +485,9 @@ JSOBJ Object_newDouble(void *prv, double value) { return PyFloat_FromDouble(value); } -JSOBJ Object_newBigNum(void* prv, wchar_t *start, wchar_t *end) { - PyObject* obj_as_unicode; - obj_as_unicode = PyUnicode_FromWideChar(start, (end - start)); - return PyLong_FromUnicodeObject(obj_as_unicode, 0); +JSOBJ Object_newBigNum(void* prv, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + return PyLong_FromString(decoder->cStr, NULL, 0); } static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) { @@ -525,6 +526,7 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { pyDecoder.curdim = 0; pyDecoder.npyarr = NULL; pyDecoder.npyarr_addr = NULL; + pyDecoder.cStr = ""; decoder = (JSONObjectDecoder *)&pyDecoder; From f3894e570af0b42b56a1ac74a08037a105f33472 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 26 Jun 2020 23:39:56 +0000 Subject: [PATCH 09/19] added conversion intValue->string --- pandas/_libs/src/ujson/lib/ultrajsondec.c | 30 +++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index 143d52f5055b6..f36d5c8d410bc 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -43,6 +43,7 @@ Numeric decoder derived from from TCL library #include #include #include +#include #include #include #include @@ -64,6 +65,7 @@ struct DecoderState { int escHeap; int lastType; JSUINT32 objDepth; + char *cStr; // storage for BigNum void *prv; JSONObjectDecoder *dec; }; @@ -163,8 +165,32 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { // to newDigit = (JSLONG)(chr - 48); + // TO DO: need to fix overflow catching if (intValue*10ULL + newDigit > overflowLimit) { // TO DO: store current intValue for later processing + + // convert current inValue into string + int length = snprintf( NULL, 0, "%lu", intValue); + char* intValue_asStr = malloc( length + 1 ); + snprintf(intValue_asStr, length + 1, "%lu", intValue); + + // copy current ds->cStr into a temporary variable + char* cStr_existing = malloc(strlen(ds->cStr)+1); + memcpy(cStr_existing, ds->cStr, strlen(ds->cStr)+1); + + // size of ds->cStr after concatenation with str + size_t new_size = strlen(cStr_existing)+strlen(intValue_asStr)+1; + + char* new_cStr = malloc(new_size); + memcpy(new_cStr, cStr_existing, strlen(cStr_existing)); + strcat(new_cStr, intValue_asStr); + + // copy concatenated string back to ds->cStr + // TO DO: this is failing + ds->cStr = realloc(ds->cStr, new_size); + strcpy(ds->cStr, new_cStr); + ds->cStr = realloc(ds->cStr, new_size); + // then reset intValue intValue = (newDigit==0) ? 10 : newDigit; } @@ -200,6 +226,9 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { ds->lastType = JT_INT; ds->start = offset; + // check if ds->cStr has been written to + // if yes append str(intValue) + // and return ds->dec->newBigNum if ((intValue >> 31)) { return ds->dec->newLong(ds->prv, (JSINT64)(intValue * (JSINT64)intNeg)); } else { @@ -1174,6 +1203,7 @@ JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, ds.dec->errorStr = NULL; ds.dec->errorOffset = NULL; ds.objDepth = 0; + ds.cStr = ""; // TO DO: this isn't the right initialization (not sure why) ds.dec = dec; From bfe6595429a338bd44c9acca0f8b6d136d9e8d33 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 27 Jun 2020 19:44:53 +0000 Subject: [PATCH 10/19] implemented cStr storage of long int for JSON decoding --- pandas/_libs/src/ujson/lib/ultrajsondec.c | 61 ++++++++++++++--------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index f36d5c8d410bc..f74b601d676b5 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -167,30 +167,26 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { // TO DO: need to fix overflow catching if (intValue*10ULL + newDigit > overflowLimit) { - // TO DO: store current intValue for later processing - + // convert current inValue into string int length = snprintf( NULL, 0, "%lu", intValue); char* intValue_asStr = malloc( length + 1 ); snprintf(intValue_asStr, length + 1, "%lu", intValue); - - // copy current ds->cStr into a temporary variable - char* cStr_existing = malloc(strlen(ds->cStr)+1); - memcpy(cStr_existing, ds->cStr, strlen(ds->cStr)+1); - - // size of ds->cStr after concatenation with str - size_t new_size = strlen(cStr_existing)+strlen(intValue_asStr)+1; - - char* new_cStr = malloc(new_size); - memcpy(new_cStr, cStr_existing, strlen(cStr_existing)); - strcat(new_cStr, intValue_asStr); - - // copy concatenated string back to ds->cStr - // TO DO: this is failing - ds->cStr = realloc(ds->cStr, new_size); - strcpy(ds->cStr, new_cStr); - ds->cStr = realloc(ds->cStr, new_size); - + + if (strlen(ds->cStr)== 0) { // first overflow + ds->cStr = (char*)realloc(ds->cStr, strlen(intValue_asStr)+1); + strcpy(ds->cStr, intValue_asStr); + } else { // has overflown before + char* cStr_prev = malloc(strlen(ds->cStr)); + memcpy(cStr_prev, ds->cStr, strlen(ds->cStr)); + + size_t new_size = strlen(ds->cStr) + strlen(intValue_asStr) + 1; + ds->cStr = (char*)realloc(ds->cStr, new_size); + + strcpy(ds->cStr, cStr_prev); + strcat(ds->cStr, intValue_asStr); + } + // then reset intValue intValue = (newDigit==0) ? 10 : newDigit; } @@ -227,9 +223,24 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { ds->start = offset; // check if ds->cStr has been written to - // if yes append str(intValue) - // and return ds->dec->newBigNum - if ((intValue >> 31)) { + if (strlen(ds->cStr)>0){ + + // covert intValue to cString + int length = snprintf( NULL, 0, "%lu", intValue); + char* intValue_asStr = malloc( length + 1 ); + snprintf(intValue_asStr, length + 1, "%lu", intValue); + + char* cStr_prev = malloc(strlen(ds->cStr)); + memcpy(cStr_prev, ds->cStr, strlen(ds->cStr)); + + size_t new_size = strlen(ds->cStr) + strlen(intValue_asStr) + 1; + ds->cStr = (char*)realloc(ds->cStr, new_size); + strcpy(ds->cStr, cStr_prev); + strcat(ds->cStr, intValue_asStr); + + return ds->dec->newBigNum(ds->prv, ds->cStr); + } + else if ((intValue >> 31)) { return ds->dec->newLong(ds->prv, (JSINT64)(intValue * (JSINT64)intNeg)); } else { return ds->dec->newInt(ds->prv, (JSINT32)(intValue * intNeg)); @@ -1203,7 +1214,9 @@ JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, ds.dec->errorStr = NULL; ds.dec->errorOffset = NULL; ds.objDepth = 0; - ds.cStr = ""; // TO DO: this isn't the right initialization (not sure why) + + ds.cStr = malloc(sizeof("\0")); + strcpy(ds.cStr, "\0"); ds.dec = dec; From c18f685b47dcea9d8385db10a0d8fdd381276eac Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 27 Jun 2020 19:45:19 +0000 Subject: [PATCH 11/19] implemented cStr storage of long int for JSON decoding --- pandas/_libs/src/ujson/python/JSONtoObj.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index 49877412e55f5..76e0f3abdb488 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -51,7 +51,7 @@ typedef struct __PyObjectDecoder { void *npyarr_addr; // Ref to npyarr ptr to track DECREF calls npy_intp curdim; // Current array dimension - char *cStr; // storage for BigNum + PyArray_Descr *dtype; } PyObjectDecoder; @@ -485,9 +485,9 @@ JSOBJ Object_newDouble(void *prv, double value) { return PyFloat_FromDouble(value); } -JSOBJ Object_newBigNum(void* prv, void *_decoder) { - PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; - return PyLong_FromString(decoder->cStr, NULL, 0); + +JSOBJ Object_newBigNum(void* prv, char* cStr) { + return PyLong_FromString(cStr, NULL, 0); } static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) { @@ -526,7 +526,6 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { pyDecoder.curdim = 0; pyDecoder.npyarr = NULL; pyDecoder.npyarr_addr = NULL; - pyDecoder.cStr = ""; decoder = (JSONObjectDecoder *)&pyDecoder; From 402770407de758735defe287f9b0637323855865 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 27 Jun 2020 19:46:01 +0000 Subject: [PATCH 12/19] updated ujson header file --- pandas/_libs/src/ujson/lib/ultrajson.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index 3c1f1ef40ef4b..1821cce0fa34b 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -304,7 +304,7 @@ typedef struct __JSONObjectDecoder { JSOBJ (*newInt)(void *prv, JSINT32 value); JSOBJ (*newLong)(void *prv, JSINT64 value); JSOBJ (*newDouble)(void *prv, double value); - JSOBJ (*newBigNum)(void *prv, void *decoder); + JSOBJ (*newBigNum)(void *prv, char* cStr); void (*releaseObject)(void *prv, JSOBJ obj, void *decoder); JSPFN_MALLOC malloc; JSPFN_FREE free; @@ -312,6 +312,7 @@ typedef struct __JSONObjectDecoder { char *errorStr; char *errorOffset; int preciseFloat; + char *cStr; void *prv; } JSONObjectDecoder; From e002cc4569120a9c5f536aa3f82ff83abd6fdf60 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 27 Jun 2020 19:47:57 +0000 Subject: [PATCH 13/19] updated ujson header file --- pandas/_libs/src/ujson/lib/ultrajson.h | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index 1821cce0fa34b..ef034b1ac233a 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -312,7 +312,6 @@ typedef struct __JSONObjectDecoder { char *errorStr; char *errorOffset; int preciseFloat; - char *cStr; void *prv; } JSONObjectDecoder; From aa74e5a9d492b68602c6c8f206787613fa53d326 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 27 Jun 2020 19:49:38 +0000 Subject: [PATCH 14/19] styling changes to minimize diff --- pandas/_libs/src/ujson/python/JSONtoObj.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index 76e0f3abdb488..cae20cb14455d 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -51,8 +51,6 @@ typedef struct __PyObjectDecoder { void *npyarr_addr; // Ref to npyarr ptr to track DECREF calls npy_intp curdim; // Current array dimension - - PyArray_Descr *dtype; } PyObjectDecoder; @@ -485,7 +483,6 @@ JSOBJ Object_newDouble(void *prv, double value) { return PyFloat_FromDouble(value); } - JSOBJ Object_newBigNum(void* prv, char* cStr) { return PyLong_FromString(cStr, NULL, 0); } From 60cccc865847b9029ddc69be27a9433c0d5cabb9 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 27 Jun 2020 19:56:53 +0000 Subject: [PATCH 15/19] removed xfail test (fixed in this PR) --- pandas/tests/io/json/test_ujson.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index d3daf8ba57ef2..a835889f6ebff 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -1045,13 +1045,6 @@ def test_decode_array(self, arr): def test_decode_extreme_numbers(self, extreme_num): assert extreme_num == ujson.decode(str(extreme_num)) - @pytest.mark.parametrize( - "too_extreme_num", ["9223372036854775808", "-90223372036854775809"] - ) - def test_decode_too_extreme_numbers(self, too_extreme_num): - with pytest.raises(ValueError): - ujson.decode(too_extreme_num) - def test_decode_with_trailing_whitespaces(self): assert {} == ujson.decode("{}\n\t ") From 6fd68ef087e39311ebf012263fa48b51c03d9feb Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 27 Jun 2020 20:13:33 +0000 Subject: [PATCH 16/19] fixed xfail test --- pandas/tests/io/json/test_ujson.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index a835889f6ebff..335a91423bf53 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -1053,8 +1053,10 @@ def test_decode_with_trailing_non_whitespaces(self): ujson.decode("{}\n\t a") def test_decode_array_with_big_int(self): - with pytest.raises(ValueError): - ujson.loads("[18446098363113800555]") + # GH20599 + result = ujson.loads("[18446098363113800555]") + expected = [18446098363113800555] + assert result == expected @pytest.mark.parametrize( "float_number", From 74980f477f9113f0d9bb95f6588457ef12fc70e1 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 27 Jun 2020 21:35:22 +0000 Subject: [PATCH 17/19] freed ds->cStr at the end of JSON_DecodeObject --- pandas/_libs/src/ujson/lib/ultrajsondec.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index f74b601d676b5..bea6836b0aa00 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -180,7 +180,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { char* cStr_prev = malloc(strlen(ds->cStr)); memcpy(cStr_prev, ds->cStr, strlen(ds->cStr)); - size_t new_size = strlen(ds->cStr) + strlen(intValue_asStr) + 1; + size_t new_size = strlen(ds->cStr) + strlen(intValue_asStr); ds->cStr = (char*)realloc(ds->cStr, new_size); strcpy(ds->cStr, cStr_prev); @@ -233,7 +233,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { char* cStr_prev = malloc(strlen(ds->cStr)); memcpy(cStr_prev, ds->cStr, strlen(ds->cStr)); - size_t new_size = strlen(ds->cStr) + strlen(intValue_asStr) + 1; + size_t new_size = strlen(ds->cStr) + strlen(intValue_asStr); ds->cStr = (char*)realloc(ds->cStr, new_size); strcpy(ds->cStr, cStr_prev); strcat(ds->cStr, intValue_asStr); @@ -1245,5 +1245,7 @@ JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, return SetError(&ds, -1, "Trailing data"); } + free(ds.cStr); + return ret; } From 618f97f9f4ad491bf8f85bd9d3ae18aee12326f7 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 28 Jun 2020 20:03:59 +0000 Subject: [PATCH 18/19] TST: updated test --- pandas/tests/io/json/test_ujson.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 335a91423bf53..ca9478033b094 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -560,7 +560,15 @@ def test_encode_long_conversion(self): assert output == json.dumps(long_input) assert long_input == ujson.decode(output) - @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)]) + @pytest.mark.parametrize( + "bigNum", + [ + sys.maxsize + 1, + sys.maxsize * sys.maxsize + 100, + -(sys.maxsize + 2), + -(sys.maxsize * sys.maxsize + 100), + ], + ) def test_dumps_ints_larger_than_maxsize(self, bigNum): # GH34395 bigNum = sys.maxsize + 1 From eeea2149ac5e8c11005a7b65d93115ac7ce789d1 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 30 Jun 2020 04:19:42 +0000 Subject: [PATCH 19/19] fixed Overflow check in ultrajsondec.c --- pandas/_libs/src/ujson/lib/ultrajsondec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index bea6836b0aa00..265576a7f9004 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -128,6 +128,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { double expValue; char *offset = ds->start; + JSUINT64 overflowLimit = LLONG_MAX; if (*(offset) == 'I') { @@ -166,7 +167,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { newDigit = (JSLONG)(chr - 48); // TO DO: need to fix overflow catching - if (intValue*10ULL + newDigit > overflowLimit) { + if (intValue> (overflowLimit-newDigit)/10) { // convert current inValue into string int length = snprintf( NULL, 0, "%lu", intValue);