Skip to content

Commit 4e8e2ff

Browse files
committed
BUG: Fix pandas-dev#15344 by backporting ujson usage of PEP 393 APIs for compact ascii
1 parent d38d142 commit 4e8e2ff

File tree

3 files changed

+22
-0
lines changed

3 files changed

+22
-0
lines changed

doc/source/whatsnew/v0.20.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -517,3 +517,5 @@ Bug Fixes
517517

518518
- Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`)
519519
- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`)
520+
521+
- Bug in ``to_json`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`)

pandas/io/tests/json/test_pandas.py

+10
Original file line numberDiff line numberDiff line change
@@ -1044,3 +1044,13 @@ def roundtrip(s, encoding='latin-1'):
10441044

10451045
for s in examples:
10461046
roundtrip(s)
1047+
1048+
def test_data_frame_size_after_to_json(self):
1049+
# GH15344
1050+
df = DataFrame({'a': [str(1)]})
1051+
1052+
size_before = df.memory_usage(index=True, deep=True).sum()
1053+
df.to_json()
1054+
size_after = df.memory_usage(index=True, deep=True).sum()
1055+
1056+
self.assertEqual(size_before, size_after)

pandas/src/ujson/python/objToJSON.c

+10
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,16 @@ static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
402402
static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
403403
size_t *_outLen) {
404404
PyObject *obj = (PyObject *)_obj;
405+
406+
#if (PY_VERSION_HEX >= 0x03030000)
407+
if(PyUnicode_IS_COMPACT_ASCII(obj)) {
408+
Py_ssize_t len;
409+
char *data = PyUnicode_AsUTF8AndSize(obj, &len);
410+
*_outLen = len;
411+
return data;
412+
}
413+
#endif
414+
405415
PyObject *newObj = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
406416
PyUnicode_GET_SIZE(obj), NULL);
407417

0 commit comments

Comments
 (0)