diff --git a/doc/source/release.rst b/doc/source/release.rst index 390c6e857ba32..009b5183c5632 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -212,6 +212,7 @@ See :ref:`Internal Refactoring` - In ``to_json``, fix date handling so milliseconds are the default timestamp as the docstring says (:issue:`4362`). - JSON NaT handling fixed, NaTs are now serialised to `null` (:issue:`4498`) + - Fixed JSON handling of escapable characters in JSON object keys (:issue:`4593`) - Fixed passing ``keep_default_na=False`` when ``na_values=None`` (:issue:`4318`) - Fixed bug with ``values`` raising an error on a DataFrame with duplicate columns and mixed dtypes, surfaced in (:issue:`4377`) diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 1f79f5670cc75..108e779129672 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -14,7 +14,7 @@ from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network, - ensure_clean) + ensure_clean, assert_index_equal) import pandas.util.testing as tm from numpy.testing.decorators import slow @@ -53,6 +53,21 @@ def setUp(self): self.tsframe = _tsframe.copy() self.mixed_frame = _mixed_frame.copy() + def test_frame_double_encoded_labels(self): + df = DataFrame([['a', 'b'], ['c', 'd']], + index=['index " 1', 'index / 2'], + columns=['a \\ b', 'y / z']) + + assert_frame_equal( + df, read_json(df.to_json(orient='split'), orient='split')) + assert_frame_equal( + df, read_json(df.to_json(orient='columns'), orient='columns')) + assert_frame_equal( + df, read_json(df.to_json(orient='index'), orient='index')) + df_unser = read_json(df.to_json(orient='records'), orient='records') + assert_index_equal(df.columns, df_unser.columns) + np.testing.assert_equal(df.values, df_unser.values) + def test_frame_non_unique_index(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1], columns=['x', 'y']) diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c index 22f9cf8d7667a..715fbbcaa8856 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/src/ujson/python/objToJSON.c @@ -488,6 +488,7 @@ JSOBJ NpyArr_iterGetValue(JSOBJ obj, JSONTypeContext *tc) char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { + JSONObjectEncoder* enc = (JSONObjectEncoder*) tc->encoder; NpyArrContext* npyarr; npy_intp idx; PRINTMARK(); @@ -496,13 +497,19 @@ char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { idx = npyarr->index[npyarr->stridedim] - 1; *outLen = strlen(npyarr->columnLabels[idx]); - return npyarr->columnLabels[idx]; + memcpy(enc->offset, npyarr->columnLabels[idx], sizeof(char)*(*outLen)); + enc->offset += *outLen; + *outLen = 0; + return NULL; } else { idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1; *outLen = strlen(npyarr->rowLabels[idx]); - return npyarr->rowLabels[idx]; + memcpy(enc->offset, npyarr->rowLabels[idx], sizeof(char)*(*outLen)); + enc->offset += *outLen; + *outLen = 0; + return NULL; } } @@ -1064,7 +1071,7 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in // NOTE this function steals a reference to labels. PyArrayObject* labelsTmp = NULL; PyObject* item = NULL; - npy_intp i, stride, len; + npy_intp i, stride, len, need_quotes; char** ret; char *dataptr, *cLabel, *origend, *origst, *origoffset; char labelBuffer[NPY_JSON_BUFSIZE]; @@ -1117,15 +1124,8 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in break; } - // trim off any quotes surrounding the result - if (*cLabel == '\"') - { - cLabel++; - enc->offset -= 2; - *(enc->offset) = '\0'; - } - - len = enc->offset - cLabel + 1; + need_quotes = ((*cLabel) != '"'); + len = enc->offset - cLabel + 1 + 2 * need_quotes; ret[i] = PyObject_Malloc(sizeof(char)*len); if (!ret[i]) @@ -1135,7 +1135,18 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in break; } - memcpy(ret[i], cLabel, sizeof(char)*len); + if (need_quotes) + { + ret[i][0] = '"'; + memcpy(ret[i]+1, cLabel, sizeof(char)*(len-4)); + ret[i][len-3] = '"'; + } + else + { + memcpy(ret[i], cLabel, sizeof(char)*(len-2)); + } + ret[i][len-2] = ':'; + ret[i][len-1] = '\0'; dataptr += stride; }