Skip to content

Commit 00f054b

Browse files
committed
BUG: Fix pandas-dev#15344 by backporting ujson usage of PEP 393 APIs for compact ascii
1 parent d38d142 commit 00f054b

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

pandas/io/tests/json/test_pandas.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -1018,7 +1018,7 @@ def test_latin_encoding(self):
10181018
[b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
10191019
[b'', b'a', b'b', b'c'],
10201020
[b'\xf8\xfc', b'a', b'b', b'c'],
1021-
[b'A\xf8\xfc', b'', b'a', b'b', b'c'],
1021+
PEP [b'A\xf8\xfc', b'', b'a', b'b', b'c'],
10221022
[np.nan, b'', b'b', b'c'],
10231023
[b'A\xf8\xfc', np.nan, b'', b'b', b'c']]
10241024

@@ -1044,3 +1044,13 @@ def roundtrip(s, encoding='latin-1'):
10441044

10451045
for s in examples:
10461046
roundtrip(s)
1047+
1048+
def test_data_frame_size_after_to_json(self):
1049+
# GH15344
1050+
df = DataFrame({'a': [str(1)]})
1051+
1052+
size_before = df.memory_usage(index=True, deep=True).sum()
1053+
df.to_json()
1054+
size_after = df.memory_usage(index=True, deep=True).sum()
1055+
1056+
self.assertEqual(size_before, size_after)

pandas/src/ujson/python/objToJSON.c

+10
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,16 @@ static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
402402
static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
403403
size_t *_outLen) {
404404
PyObject *obj = (PyObject *)_obj;
405+
406+
#if (PY_VERSION_HEX >= 0x03030000)
407+
if(PyUnicode_IS_COMPACT_ASCII(obj)) {
408+
Py_ssize_t len;
409+
char *data = PyUnicode_AsUTF8AndSize(obj, &len);
410+
*_outLen = len;
411+
return data;
412+
}
413+
#endif
414+
405415
PyObject *newObj = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
406416
PyUnicode_GET_SIZE(obj), NULL);
407417

0 commit comments

Comments
 (0)