Skip to content

Use const char* for JSON key name #60721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ typedef void (*JSPFN_ITERBEGIN)(JSOBJ obj, JSONTypeContext *tc);
typedef int (*JSPFN_ITERNEXT)(JSOBJ obj, JSONTypeContext *tc);
typedef void (*JSPFN_ITEREND)(JSOBJ obj, JSONTypeContext *tc);
typedef JSOBJ (*JSPFN_ITERGETVALUE)(JSOBJ obj, JSONTypeContext *tc);
typedef char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc,
size_t *outLen);
typedef const char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc,
size_t *outLen);
typedef void *(*JSPFN_MALLOC)(size_t size);
typedef void (*JSPFN_FREE)(void *pptr);
typedef void *(*JSPFN_REALLOC)(void *base, size_t size);
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -920,7 +920,7 @@ Perhaps implement recursion detection */
void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
size_t cbName) {
const char *value;
char *objName;
const char *objName;
int count;
JSOBJ iterObj;
size_t szlen;
Expand Down
135 changes: 56 additions & 79 deletions pandas/_libs/src/vendored/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ Numeric decoder derived from TCL library

npy_int64 get_nat(void) { return NPY_MIN_INT64; }

typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
size_t *_outLen);
typedef const char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
size_t *_outLen);

int object_is_decimal_type(PyObject *obj);
int object_is_dataframe_type(PyObject *obj);
Expand Down Expand Up @@ -106,7 +106,7 @@ typedef struct __TypeContext {
double doubleValue;
JSINT64 longValue;

char *cStr;
const char *cStr;
NpyArrContext *npyarr;
PdBlockContext *pdblock;
int transpose;
Expand Down Expand Up @@ -301,14 +301,15 @@ static npy_float64 total_seconds(PyObject *td) {
return double_val;
}

static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
size_t *_outLen) {
static const char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
size_t *_outLen) {
PyObject *obj = (PyObject *)_obj;
*_outLen = PyBytes_GET_SIZE(obj);
return PyBytes_AS_STRING(obj);
}

static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) {
static const char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc,
size_t *_outLen) {
char *encoded = (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen);
if (encoded == NULL) {
/* Something went wrong.
Expand All @@ -321,24 +322,24 @@ static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) {
}

/* JSON callback. returns a char* and mutates the pointer to *len */
static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
JSONTypeContext *tc, size_t *len) {
static const char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
JSONTypeContext *tc, size_t *len) {
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
NPY_DATETIMEUNIT valueUnit = ((PyObjectEncoder *)tc->encoder)->valueUnit;
GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, valueUnit, base, len);
return GET_TC(tc)->cStr;
}

/* JSON callback. returns a char* and mutates the pointer to *len */
static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
JSONTypeContext *tc, size_t *len) {
static const char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
JSONTypeContext *tc, size_t *len) {
GET_TC(tc)->cStr = int64ToIsoDuration(GET_TC(tc)->longValue, len);
return GET_TC(tc)->cStr;
}

/* JSON callback */
static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
size_t *len) {
static const char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
size_t *len) {
if (!PyDate_Check(obj) && !PyDateTime_Check(obj)) {
PyErr_SetString(PyExc_TypeError, "Expected date or datetime object");
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
Expand All @@ -349,7 +350,8 @@ static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
return PyDateTimeToIso(obj, base, len);
}

static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
static const char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc,
size_t *outLen) {
PyObject *obj = (PyObject *)_obj;
PyObject *str = PyObject_CallMethod(obj, "isoformat", NULL);
if (str == NULL) {
Expand All @@ -373,8 +375,8 @@ static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
return outValue;
}

static char *PyDecimalToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc,
size_t *len) {
static const char *PyDecimalToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc,
size_t *len) {
PyObject *obj = (PyObject *)_obj;
PyObject *format_spec = PyUnicode_FromStringAndSize("f", 1);
PyObject *str = PyObject_Format(obj, format_spec);
Expand Down Expand Up @@ -558,10 +560,10 @@ static JSOBJ NpyArr_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
NpyArrContext *npyarr = GET_TC(tc)->npyarr;
char *cStr;
const char *cStr;

if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
const npy_intp idx = npyarr->index[npyarr->stridedim] - 1;
Expand Down Expand Up @@ -609,11 +611,11 @@ static int PdBlock_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
return NpyArr_iterNextItem(obj, tc);
}

static char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *tc, size_t *outLen) {
PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
NpyArrContext *npyarr = blkCtxt->npyCtxts[0];
char *cStr;
const char *cStr;

if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) {
const npy_intp idx = blkCtxt->colIdx - 1;
Expand All @@ -631,12 +633,12 @@ static char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
return cStr;
}

static char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj),
JSONTypeContext *tc,
size_t *outLen) {
static const char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj),
JSONTypeContext *tc,
size_t *outLen) {
PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx];
char *cStr;
const char *cStr;

if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
const npy_intp idx = npyarr->index[npyarr->stridedim] - 1;
Expand Down Expand Up @@ -817,9 +819,9 @@ static JSOBJ Tuple_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
static const char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
return NULL;
}

Expand Down Expand Up @@ -864,9 +866,9 @@ static JSOBJ Set_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Set_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
static const char *Set_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
return NULL;
}

Expand Down Expand Up @@ -962,8 +964,8 @@ static JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
*outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
return PyBytes_AS_STRING(GET_TC(tc)->itemName);
}
Expand Down Expand Up @@ -994,9 +996,9 @@ static JSOBJ List_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *List_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
static const char *List_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
return NULL;
}

Expand All @@ -1005,24 +1007,16 @@ static char *List_iterGetName(JSOBJ Py_UNUSED(obj),
//=============================================================================
static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
GET_TC(tc)->index = 0;
GET_TC(tc)->cStr = PyObject_Malloc(20);
if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
if (!GET_TC(tc)->cStr) {
return 0;
}

const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (index == 0) {
memcpy(GET_TC(tc)->cStr, "name", 5);
GET_TC(tc)->cStr = "name";
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
} else if (index == 1) {
memcpy(GET_TC(tc)->cStr, "data", 5);
GET_TC(tc)->cStr = "data";
GET_TC(tc)->itemValue = get_values(obj);
if (!GET_TC(tc)->itemValue) {
return 0;
Expand All @@ -1042,8 +1036,8 @@ static JSOBJ Index_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
*outLen = strlen(GET_TC(tc)->cStr);
return GET_TC(tc)->cStr;
}
Expand All @@ -1054,28 +1048,20 @@ static char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
GET_TC(tc)->index = 0;
GET_TC(tc)->cStr = PyObject_Malloc(20);
enc->outputFormat = VALUES; // for contained series
if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
if (!GET_TC(tc)->cStr) {
return 0;
}

const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (index == 0) {
memcpy(GET_TC(tc)->cStr, "name", 5);
GET_TC(tc)->cStr = "name";
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
} else if (index == 1) {
memcpy(GET_TC(tc)->cStr, "index", 6);
GET_TC(tc)->cStr = "index";
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
} else if (index == 2) {
memcpy(GET_TC(tc)->cStr, "data", 5);
GET_TC(tc)->cStr = "data";
GET_TC(tc)->itemValue = get_values(obj);
if (!GET_TC(tc)->itemValue) {
return 0;
Expand All @@ -1097,8 +1083,8 @@ static JSOBJ Series_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
*outLen = strlen(GET_TC(tc)->cStr);
return GET_TC(tc)->cStr;
}
Expand All @@ -1109,28 +1095,20 @@ static char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
GET_TC(tc)->index = 0;
GET_TC(tc)->cStr = PyObject_Malloc(20);
enc->outputFormat = VALUES; // for contained series & index
if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
if (!GET_TC(tc)->cStr) {
return 0;
}

const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (index == 0) {
memcpy(GET_TC(tc)->cStr, "columns", 8);
GET_TC(tc)->cStr = "columns";
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
} else if (index == 1) {
memcpy(GET_TC(tc)->cStr, "index", 6);
GET_TC(tc)->cStr = "index";
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
} else if (index == 2) {
memcpy(GET_TC(tc)->cStr, "data", 5);
GET_TC(tc)->cStr = "data";
Py_INCREF(obj);
GET_TC(tc)->itemValue = obj;
} else {
Expand All @@ -1150,8 +1128,8 @@ static JSOBJ DataFrame_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *tc, size_t *outLen) {
*outLen = strlen(GET_TC(tc)->cStr);
return GET_TC(tc)->cStr;
}
Expand Down Expand Up @@ -1201,8 +1179,8 @@ static JSOBJ Dict_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
*outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
return PyBytes_AS_STRING(GET_TC(tc)->itemName);
}
Expand Down Expand Up @@ -1902,7 +1880,6 @@ static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
GET_TC(tc)->rowLabels = NULL;
NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
GET_TC(tc)->columnLabels = NULL;
PyObject_Free(GET_TC(tc)->cStr);
GET_TC(tc)->cStr = NULL;
PyObject_Free(tc->prv);
tc->prv = NULL;
Expand Down Expand Up @@ -1953,8 +1930,8 @@ static JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) {
return GET_TC(tc)->iterGetValue(obj, tc);
}

static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc,
size_t *outLen) {
static const char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc,
size_t *outLen) {
return GET_TC(tc)->iterGetName(obj, tc, outLen);
}

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/json/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def test_read_zipped_json(datapath):

@td.skip_if_not_us_locale
@pytest.mark.single_cpu
@pytest.mark.network
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These test markers are extra, but I noticed locally that these would hang (they all make s3 calls)

def test_with_s3_url(compression, s3_public_bucket, s3so):
# Bucket created in tests/io/conftest.py
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1412,6 +1412,7 @@ def test_read_inline_jsonl(self):
tm.assert_frame_equal(result, expected)

@pytest.mark.single_cpu
@pytest.mark.network
@td.skip_if_not_us_locale
def test_read_s3_jsonl(self, s3_public_bucket_with_data, s3so):
# GH17200
Expand Down Expand Up @@ -2011,6 +2012,7 @@ def test_json_multiindex(self):
assert result == expected

@pytest.mark.single_cpu
@pytest.mark.network
def test_to_s3(self, s3_public_bucket, s3so):
# GH 28375
mock_bucket_name, target_file = s3_public_bucket.name, "test.json"
Expand Down
Loading