diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index 0470fef450dde..51c482da67943 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -244,6 +244,10 @@ typedef struct __JSONObjectEncoder { If true, '<', '>', and '&' characters will be encoded as \u003c, \u003e, and \u0026, respectively. If false, no special encoding will be used. */ int encodeHTMLChars; + /* + Configuration for spaces of indent */ + int indent; + /* Set to an error message if error occurred */ const char *errorMsg; diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c index 2d6c823a45515..1201294f3aa21 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsonenc.c +++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -722,6 +722,20 @@ FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin, while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux; } +void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc) +{ + if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n'); +} + +void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value) +{ + int i; + if (enc->indent > 0) + while (value-- > 0) + for (i = 0; i < enc->indent; i++) + Buffer_AppendCharUnchecked(enc, ' '); +} + void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) { char *wstr; JSUINT32 uvalue = (value < 0) ? -value : value; @@ -966,6 +980,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, iterObj = enc->iterGetValue(obj, &tc); enc->level++; + Buffer_AppendIndentUnchecked (enc, enc->level); encode(iterObj, enc, NULL, 0); count++; } @@ -981,6 +996,8 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, enc->iterBegin(obj, &tc); Buffer_AppendCharUnchecked(enc, '{'); + Buffer_AppendIndentNewlineUnchecked (enc); + Buffer_AppendIndentUnchecked (enc, enc->level + 2); while (enc->iterNext(obj, &tc)) { if (count > 0) { @@ -988,6 +1005,8 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, #ifndef JSON_NO_EXTRA_WHITESPACE Buffer_AppendCharUnchecked(enc, ' '); #endif + Buffer_AppendIndentNewlineUnchecked (enc); + Buffer_AppendIndentUnchecked (enc, enc->level + 2); } iterObj = enc->iterGetValue(obj, &tc); @@ -999,7 +1018,8 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, } enc->iterEnd(obj, &tc); - Buffer_Reserve(enc, 2); + Buffer_AppendIndentNewlineUnchecked (enc); + Buffer_AppendIndentUnchecked (enc, enc->level + 1); Buffer_AppendCharUnchecked(enc, '}'); break; } diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 52788f85ff71e..91d5dcbddd7d7 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -2268,7 +2268,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { static char *kwlist[] = { "obj", "ensure_ascii", "double_precision", "encode_html_chars", "orient", "date_unit", "iso_dates", "default_handler", - NULL}; + "indent", NULL}; char buffer[65536]; char *ret; @@ -2281,6 +2281,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { char *sdateFormat = NULL; PyObject *oisoDates = 0; PyObject *odefHandler = 0; + int indent = 0; PyObjectEncoder pyEncoder = {{ Object_beginTypeContext, @@ -2302,6 +2303,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { idoublePrecision, 1, // forceAscii 0, // encodeHTMLChars + 0, // indent }}; JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder; @@ -2326,10 +2328,10 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { PRINTMARK(); - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOO", kwlist, &oinput, + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOOi", kwlist, &oinput, &oensureAscii, &idoublePrecision, &oencodeHTMLChars, &sOrient, &sdateFormat, - &oisoDates, &odefHandler)) { + &oisoDates, &odefHandler, &indent)) { return NULL; } @@ -2395,6 +2397,8 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { pyEncoder.defaultHandler = odefHandler; } + encoder->indent = indent; + pyEncoder.originalOutputFormat = pyEncoder.outputFormat; PRINTMARK(); ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer)); diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 76910f425836e..00c2bcbbac7e2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2178,7 +2178,7 @@ def to_excel(self, excel_writer, sheet_name="Sheet1", na_rep="", def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression='infer', - index=True): + index=True, indent=0): """ Convert the object to a JSON string. @@ -2260,6 +2260,11 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, .. versionadded:: 0.23.0 + indent : integer, default 0 + Length of whitespace used to indent each record. + + .. versionadded:: 0.25.0 + Returns ------- None or str @@ -2325,7 +2330,7 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, force_ascii=force_ascii, date_unit=date_unit, default_handler=default_handler, lines=lines, compression=compression, - index=index) + index=index, indent=indent) def to_hdf(self, path_or_buf, key, **kwargs): """ diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index ee9d9e000d7e3..b3cca412f9c5f 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -33,7 +33,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression='infer', - index=True): + index=True, indent=0): if not index and orient not in ['split', 'table']: raise ValueError("'index=False' is only valid when 'orient' is " @@ -59,7 +59,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', obj, orient=orient, date_format=date_format, double_precision=double_precision, ensure_ascii=force_ascii, date_unit=date_unit, default_handler=default_handler, - index=index).write() + index=index, indent=indent).write() if lines: s = _convert_to_line_delimits(s) @@ -78,7 +78,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', class Writer: def __init__(self, obj, orient, date_format, double_precision, - ensure_ascii, date_unit, index, default_handler=None): + ensure_ascii, date_unit, index, default_handler=None, + indent=0): self.obj = obj if orient is None: @@ -91,6 +92,7 @@ def __init__(self, obj, orient, date_format, double_precision, self.date_unit = date_unit self.default_handler = default_handler self.index = index + self.indent = indent self.is_copy = None self._format_axes() @@ -101,10 +103,11 @@ def _format_axes(self): def write(self): return self._write(self.obj, self.orient, self.double_precision, self.ensure_ascii, self.date_unit, - self.date_format == 'iso', self.default_handler) + self.date_format == 'iso', self.default_handler, + self.indent) def _write(self, obj, orient, double_precision, ensure_ascii, - date_unit, iso_dates, default_handler): + date_unit, iso_dates, default_handler, indent): return dumps( obj, orient=orient, @@ -112,7 +115,8 @@ def _write(self, obj, orient, double_precision, ensure_ascii, ensure_ascii=ensure_ascii, date_unit=date_unit, iso_dates=iso_dates, - default_handler=default_handler + default_handler=default_handler, + indent=indent ) @@ -125,11 +129,11 @@ def _format_axes(self): "'{orient}'".format(orient=self.orient)) def _write(self, obj, orient, double_precision, ensure_ascii, - date_unit, iso_dates, default_handler): + date_unit, iso_dates, default_handler, indent): if not self.index and orient == 'split': obj = {"name": obj.name, "data": obj.values} return super()._write(obj, orient, double_precision, ensure_ascii, - date_unit, iso_dates, default_handler) + date_unit, iso_dates, default_handler, indent) class FrameWriter(Writer): @@ -149,19 +153,20 @@ def _format_axes(self): "'{orient}'.".format(orient=self.orient)) def _write(self, obj, orient, double_precision, ensure_ascii, - date_unit, iso_dates, default_handler): + date_unit, iso_dates, default_handler, indent): if not self.index and orient == 'split': obj = obj.to_dict(orient='split') del obj["index"] return super()._write(obj, orient, double_precision, ensure_ascii, - date_unit, iso_dates, default_handler) + date_unit, iso_dates, default_handler, indent) class JSONTableWriter(FrameWriter): _default_orient = 'records' def __init__(self, obj, orient, date_format, double_precision, - ensure_ascii, date_unit, index, default_handler=None): + ensure_ascii, date_unit, index, default_handler=None, + indent=0): """ Adds a `schema` attribute with the Table Schema, resets the index (can't do in caller, because the schema inference needs @@ -170,7 +175,7 @@ def __init__(self, obj, orient, date_format, double_precision, """ super().__init__(obj, orient, date_format, double_precision, ensure_ascii, date_unit, index, - default_handler=default_handler) + default_handler=default_handler, indent=indent) if date_format != 'iso': msg = ("Trying to write with `orient='table'` and " @@ -211,9 +216,9 @@ def __init__(self, obj, orient, date_format, double_precision, self.index = index def _write(self, obj, orient, double_precision, ensure_ascii, - date_unit, iso_dates, default_handler): + date_unit, iso_dates, default_handler, indent): data = super()._write(obj, orient, double_precision, ensure_ascii, - date_unit, iso_dates, default_handler) + date_unit, iso_dates, default_handler, indent) serialized = '{{"schema": {schema}, "data": {data}}}'.format( schema=dumps(self.schema), data=data) return serialized diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 8b140263b12bc..4fa31bd36a4e4 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1317,3 +1317,23 @@ def test_read_timezone_information(self): index=DatetimeIndex(['2019-01-01 11:00:00'], tz='UTC')) assert_series_equal(result, expected) + + def test_to_json_indent(self): + # GH 12004 + df = pd.DataFrame([ + ['foo', 'bar'], ['baz', 'qux'] + ], columns=['a', 'b']) + + result = df.to_json(indent=4) + expected = """{ + "a":{ + "0":"foo", + "1":"baz" + }, + "b":{ + "0":"bar", + "1":"qux" + } +}""" + + assert result == expected