-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Remove Encoding of values in char** For Labels #27618
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
648d2b5
37226d7
e890578
24a585a
42cc1ec
7385fd8
4aca787
bc5e69a
d53527f
eefadfa
7b2af9f
2e25bef
0820f57
185aa61
221e43d
18d28c8
7a5fc9c
ad06e9c
3e22820
2389a33
104d2f2
dbdaed1
3ed7979
01ae178
0372794
e15bede
d698255
d453ed0
5ab7ecf
8ea895e
439b695
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -787,30 +787,23 @@ JSOBJ NpyArr_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { | |
return GET_TC(tc)->itemValue; | ||
} | ||
|
||
static void NpyArr_getLabel(JSOBJ obj, JSONTypeContext *tc, size_t *outLen, | ||
npy_intp idx, char **labels) { | ||
JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder; | ||
PRINTMARK(); | ||
*outLen = strlen(labels[idx]); | ||
Buffer_Reserve(enc, *outLen); | ||
memcpy(enc->offset, labels[idx], sizeof(char) * (*outLen)); | ||
enc->offset += *outLen; | ||
*outLen = 0; | ||
} | ||
|
||
char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { | ||
NpyArrContext *npyarr = GET_TC(tc)->npyarr; | ||
npy_intp idx; | ||
PRINTMARK(); | ||
char *cStr; | ||
|
||
if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { | ||
idx = npyarr->index[npyarr->stridedim] - 1; | ||
NpyArr_getLabel(obj, tc, outLen, idx, npyarr->columnLabels); | ||
cStr = npyarr->columnLabels[idx]; | ||
} else { | ||
idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1; | ||
NpyArr_getLabel(obj, tc, outLen, idx, npyarr->rowLabels); | ||
cStr = npyarr->rowLabels[idx]; | ||
} | ||
return NULL; | ||
|
||
*outLen = strlen(cStr); | ||
|
||
return cStr; | ||
} | ||
|
||
//============================================================================= | ||
|
@@ -852,36 +845,42 @@ char *PdBlock_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { | |
PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; | ||
NpyArrContext *npyarr = blkCtxt->npyCtxts[0]; | ||
npy_intp idx; | ||
char *cStr; | ||
PRINTMARK(); | ||
|
||
if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) { | ||
idx = blkCtxt->colIdx - 1; | ||
NpyArr_getLabel(obj, tc, outLen, idx, npyarr->columnLabels); | ||
cStr = npyarr->columnLabels[idx]; | ||
} else { | ||
idx = GET_TC(tc)->iterNext != PdBlock_iterNext | ||
? npyarr->index[npyarr->stridedim - npyarr->inc] - 1 | ||
: npyarr->index[npyarr->stridedim]; | ||
|
||
NpyArr_getLabel(obj, tc, outLen, idx, npyarr->rowLabels); | ||
cStr = npyarr->rowLabels[idx]; | ||
} | ||
return NULL; | ||
|
||
*outLen = strlen(cStr); | ||
return cStr; | ||
} | ||
|
||
char *PdBlock_iterGetName_Transpose(JSOBJ obj, JSONTypeContext *tc, | ||
size_t *outLen) { | ||
PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; | ||
NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; | ||
npy_intp idx; | ||
char *cStr; | ||
PRINTMARK(); | ||
|
||
if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { | ||
idx = npyarr->index[npyarr->stridedim] - 1; | ||
NpyArr_getLabel(obj, tc, outLen, idx, npyarr->columnLabels); | ||
cStr = npyarr->columnLabels[idx]; | ||
} else { | ||
idx = blkCtxt->colIdx; | ||
NpyArr_getLabel(obj, tc, outLen, idx, npyarr->rowLabels); | ||
cStr = npyarr->rowLabels[idx]; | ||
} | ||
return NULL; | ||
|
||
*outLen = strlen(cStr); | ||
return cStr; | ||
} | ||
|
||
int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) { | ||
|
@@ -1578,15 +1577,31 @@ void NpyArr_freeLabels(char **labels, npy_intp len) { | |
} | ||
} | ||
|
||
/* | ||
* Function: NpyArr_encodeLabels | ||
* ----------------------------- | ||
* | ||
* Builds an array of "encoded" labels. | ||
* | ||
* labels: PyArrayObject pointer for labels to be "encoded" | ||
* num : number of labels | ||
* | ||
* "encode" is quoted above because we aren't really doing encoding | ||
* For historical reasons this function would actually encode the entire | ||
* array into a separate buffer with a separate call to JSON_Encode | ||
* and would leave it to complex pointer manipulation from there to | ||
* unpack values as needed. To make things simpler and more idiomatic | ||
* this has instead just stringified any input save for datetime values, | ||
* which may need to be represented in various formats. | ||
*/ | ||
char **NpyArr_encodeLabels(PyArrayObject *labels, JSONObjectEncoder *enc, | ||
npy_intp num) { | ||
// NOTE this function steals a reference to labels. | ||
PyObjectEncoder *pyenc = (PyObjectEncoder *)enc; | ||
PyObject *item = NULL; | ||
npy_intp i, stride, len, need_quotes; | ||
npy_intp i, stride, len; | ||
char **ret; | ||
char *dataptr, *cLabel, *origend, *origst, *origoffset; | ||
char labelBuffer[NPY_JSON_BUFSIZE]; | ||
char *dataptr, *cLabel; | ||
PyArray_GetItemFunc *getitem; | ||
int type_num; | ||
PRINTMARK(); | ||
|
@@ -1614,68 +1629,44 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, JSONObjectEncoder *enc, | |
ret[i] = NULL; | ||
} | ||
|
||
origst = enc->start; | ||
origend = enc->end; | ||
origoffset = enc->offset; | ||
|
||
stride = PyArray_STRIDE(labels, 0); | ||
dataptr = PyArray_DATA(labels); | ||
getitem = (PyArray_GetItemFunc *)PyArray_DESCR(labels)->f->getitem; | ||
type_num = PyArray_TYPE(labels); | ||
|
||
for (i = 0; i < num; i++) { | ||
if (PyTypeNum_ISDATETIME(type_num) || PyTypeNum_ISNUMBER(type_num)) | ||
{ | ||
item = (PyObject *)labels; | ||
pyenc->npyType = type_num; | ||
pyenc->npyValue = dataptr; | ||
} else { | ||
item = getitem(dataptr, labels); | ||
if (!item) { | ||
NpyArr_freeLabels(ret, num); | ||
ret = 0; | ||
break; | ||
} | ||
} | ||
|
||
cLabel = JSON_EncodeObject(item, enc, labelBuffer, NPY_JSON_BUFSIZE); | ||
|
||
if (item != (PyObject *)labels) { | ||
Py_DECREF(item); | ||
} | ||
|
||
if (PyErr_Occurred() || enc->errorMsg) { | ||
item = getitem(dataptr, labels); | ||
if (!item) { | ||
NpyArr_freeLabels(ret, num); | ||
ret = 0; | ||
break; | ||
} | ||
|
||
PyObject *str = PyObject_Str(item); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbrockmendel last comment...here is where I think that check needs to go. We are getting the |
||
cLabel = PyUnicode_AsUTF8(str); | ||
len = strlen(cLabel); | ||
Py_DECREF(str); | ||
|
||
Py_DECREF(item); | ||
// Add 1 to include NULL terminator | ||
ret[i] = PyObject_Malloc(len + 1); | ||
memcpy(ret[i], cLabel, len + 1); | ||
|
||
if (PyErr_Occurred()) { | ||
NpyArr_freeLabels(ret, num); | ||
ret = 0; | ||
break; | ||
} | ||
|
||
need_quotes = ((*cLabel) != '"'); | ||
len = enc->offset - cLabel + 1 + 2 * need_quotes; | ||
ret[i] = PyObject_Malloc(sizeof(char) * len); | ||
|
||
if (!ret[i]) { | ||
PyErr_NoMemory(); | ||
ret = 0; | ||
break; | ||
} | ||
|
||
if (need_quotes) { | ||
ret[i][0] = '"'; | ||
memcpy(ret[i] + 1, cLabel, sizeof(char) * (len - 4)); | ||
ret[i][len - 3] = '"'; | ||
} else { | ||
memcpy(ret[i], cLabel, sizeof(char) * (len - 2)); | ||
} | ||
ret[i][len - 2] = ':'; | ||
ret[i][len - 1] = '\0'; | ||
dataptr += stride; | ||
} | ||
|
||
enc->start = origst; | ||
enc->end = origend; | ||
enc->offset = origoffset; | ||
|
||
Py_DECREF(labels); | ||
return ret; | ||
} | ||
|
@@ -2429,7 +2420,6 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { | |
PRINTMARK(); | ||
ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer)); | ||
PRINTMARK(); | ||
|
||
if (PyErr_Occurred()) { | ||
PRINTMARK(); | ||
return NULL; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it looks like replacing this function with the simpler version below is orthogonal ot everything else. am i reading it wrong?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not orthogonal just simplified. Previously since the labels would all be encoded into a buffer this function would help return the unencoded label. Instead now all labels are stored unencoded in the
char **
and can easily be accessed by index, leaving the ultimate encoding up to ujson