Skip to content

Commit 3644370

Browse files
committed
Merge pull request #4593 from Komnomnomnom/ujson-slash-happy
BUG: ujson labels are encoded twice
2 parents 4c8ad82 + 240d76e commit 3644370

File tree

3 files changed

+41
-14
lines changed

3 files changed

+41
-14
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
212212
- In ``to_json``, fix date handling so milliseconds are the default timestamp
213213
as the docstring says (:issue:`4362`).
214214
- JSON NaT handling fixed, NaTs are now serialised to `null` (:issue:`4498`)
215+
- Fixed JSON handling of escapable characters in JSON object keys (:issue:`4593`)
215216
- Fixed passing ``keep_default_na=False`` when ``na_values=None`` (:issue:`4318`)
216217
- Fixed bug with ``values`` raising an error on a DataFrame with duplicate columns and mixed
217218
dtypes, surfaced in (:issue:`4377`)

pandas/io/tests/test_json/test_pandas.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from pandas.util.testing import (assert_almost_equal, assert_frame_equal,
1616
assert_series_equal, network,
17-
ensure_clean)
17+
ensure_clean, assert_index_equal)
1818
import pandas.util.testing as tm
1919
from numpy.testing.decorators import slow
2020

@@ -53,6 +53,21 @@ def setUp(self):
5353
self.tsframe = _tsframe.copy()
5454
self.mixed_frame = _mixed_frame.copy()
5555

56+
def test_frame_double_encoded_labels(self):
57+
df = DataFrame([['a', 'b'], ['c', 'd']],
58+
index=['index " 1', 'index / 2'],
59+
columns=['a \\ b', 'y / z'])
60+
61+
assert_frame_equal(
62+
df, read_json(df.to_json(orient='split'), orient='split'))
63+
assert_frame_equal(
64+
df, read_json(df.to_json(orient='columns'), orient='columns'))
65+
assert_frame_equal(
66+
df, read_json(df.to_json(orient='index'), orient='index'))
67+
df_unser = read_json(df.to_json(orient='records'), orient='records')
68+
assert_index_equal(df.columns, df_unser.columns)
69+
np.testing.assert_equal(df.values, df_unser.values)
70+
5671
def test_frame_non_unique_index(self):
5772
df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
5873
columns=['x', 'y'])

pandas/src/ujson/python/objToJSON.c

+24-13
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@ JSOBJ NpyArr_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
488488

489489
char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
490490
{
491+
JSONObjectEncoder* enc = (JSONObjectEncoder*) tc->encoder;
491492
NpyArrContext* npyarr;
492493
npy_intp idx;
493494
PRINTMARK();
@@ -496,13 +497,19 @@ char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
496497
{
497498
idx = npyarr->index[npyarr->stridedim] - 1;
498499
*outLen = strlen(npyarr->columnLabels[idx]);
499-
return npyarr->columnLabels[idx];
500+
memcpy(enc->offset, npyarr->columnLabels[idx], sizeof(char)*(*outLen));
501+
enc->offset += *outLen;
502+
*outLen = 0;
503+
return NULL;
500504
}
501505
else
502506
{
503507
idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1;
504508
*outLen = strlen(npyarr->rowLabels[idx]);
505-
return npyarr->rowLabels[idx];
509+
memcpy(enc->offset, npyarr->rowLabels[idx], sizeof(char)*(*outLen));
510+
enc->offset += *outLen;
511+
*outLen = 0;
512+
return NULL;
506513
}
507514
}
508515

@@ -1064,7 +1071,7 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in
10641071
// NOTE this function steals a reference to labels.
10651072
PyArrayObject* labelsTmp = NULL;
10661073
PyObject* item = NULL;
1067-
npy_intp i, stride, len;
1074+
npy_intp i, stride, len, need_quotes;
10681075
char** ret;
10691076
char *dataptr, *cLabel, *origend, *origst, *origoffset;
10701077
char labelBuffer[NPY_JSON_BUFSIZE];
@@ -1117,15 +1124,8 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in
11171124
break;
11181125
}
11191126

1120-
// trim off any quotes surrounding the result
1121-
if (*cLabel == '\"')
1122-
{
1123-
cLabel++;
1124-
enc->offset -= 2;
1125-
*(enc->offset) = '\0';
1126-
}
1127-
1128-
len = enc->offset - cLabel + 1;
1127+
need_quotes = ((*cLabel) != '"');
1128+
len = enc->offset - cLabel + 1 + 2 * need_quotes;
11291129
ret[i] = PyObject_Malloc(sizeof(char)*len);
11301130

11311131
if (!ret[i])
@@ -1135,7 +1135,18 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in
11351135
break;
11361136
}
11371137

1138-
memcpy(ret[i], cLabel, sizeof(char)*len);
1138+
if (need_quotes)
1139+
{
1140+
ret[i][0] = '"';
1141+
memcpy(ret[i]+1, cLabel, sizeof(char)*(len-4));
1142+
ret[i][len-3] = '"';
1143+
}
1144+
else
1145+
{
1146+
memcpy(ret[i], cLabel, sizeof(char)*(len-2));
1147+
}
1148+
ret[i][len-2] = ':';
1149+
ret[i][len-1] = '\0';
11391150
dataptr += stride;
11401151
}
11411152

0 commit comments

Comments
 (0)