Skip to content

WIP: Add indent support in to_json #26457

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pandas/_libs/src/ujson/lib/ultrajson.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,10 @@ typedef struct __JSONObjectEncoder {
If true, '<', '>', and '&' characters will be encoded as \u003c, \u003e, and \u0026, respectively. If false, no special encoding will be used. */
int encodeHTMLChars;

/*
Configuration for spaces of indent */
int indent;

/*
Set to an error message if error occurred */
const char *errorMsg;
Expand Down
22 changes: 21 additions & 1 deletion pandas/_libs/src/ujson/lib/ultrajsonenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,20 @@ FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin,
while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux;
}

void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc)
{
if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n');
}

void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value)
{
int i;
if (enc->indent > 0)
while (value-- > 0)
for (i = 0; i < enc->indent; i++)
Buffer_AppendCharUnchecked(enc, ' ');
}

void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) {
char *wstr;
JSUINT32 uvalue = (value < 0) ? -value : value;
Expand Down Expand Up @@ -966,6 +980,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
iterObj = enc->iterGetValue(obj, &tc);

enc->level++;
Buffer_AppendIndentUnchecked (enc, enc->level);
encode(iterObj, enc, NULL, 0);
count++;
}
Expand All @@ -981,13 +996,17 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
enc->iterBegin(obj, &tc);

Buffer_AppendCharUnchecked(enc, '{');
Buffer_AppendIndentNewlineUnchecked (enc);
Buffer_AppendIndentUnchecked (enc, enc->level + 2);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned this + 2 isn't permanent just a hack during WIP to get this to work. Something seems off with the level struct member I just haven't figured out yet


while (enc->iterNext(obj, &tc)) {
if (count > 0) {
Buffer_AppendCharUnchecked(enc, ',');
#ifndef JSON_NO_EXTRA_WHITESPACE
Buffer_AppendCharUnchecked(enc, ' ');
#endif
Buffer_AppendIndentNewlineUnchecked (enc);
Buffer_AppendIndentUnchecked (enc, enc->level + 2);
}

iterObj = enc->iterGetValue(obj, &tc);
Expand All @@ -999,7 +1018,8 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
}

enc->iterEnd(obj, &tc);
Buffer_Reserve(enc, 2);
Buffer_AppendIndentNewlineUnchecked (enc);
Buffer_AppendIndentUnchecked (enc, enc->level + 1);
Buffer_AppendCharUnchecked(enc, '}');
break;
}
Expand Down
10 changes: 7 additions & 3 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -2268,7 +2268,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
static char *kwlist[] = {
"obj", "ensure_ascii", "double_precision", "encode_html_chars",
"orient", "date_unit", "iso_dates", "default_handler",
NULL};
"indent", NULL};

char buffer[65536];
char *ret;
Expand All @@ -2281,6 +2281,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
char *sdateFormat = NULL;
PyObject *oisoDates = 0;
PyObject *odefHandler = 0;
int indent = 0;

PyObjectEncoder pyEncoder = {{
Object_beginTypeContext,
Expand All @@ -2302,6 +2303,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
idoublePrecision,
1, // forceAscii
0, // encodeHTMLChars
0, // indent
}};
JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder;

Expand All @@ -2326,10 +2328,10 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {

PRINTMARK();

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOO", kwlist, &oinput,
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOOi", kwlist, &oinput,
&oensureAscii, &idoublePrecision,
&oencodeHTMLChars, &sOrient, &sdateFormat,
&oisoDates, &odefHandler)) {
&oisoDates, &odefHandler, &indent)) {
return NULL;
}

Expand Down Expand Up @@ -2395,6 +2397,8 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
pyEncoder.defaultHandler = odefHandler;
}

encoder->indent = indent;

pyEncoder.originalOutputFormat = pyEncoder.outputFormat;
PRINTMARK();
ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer));
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2178,7 +2178,7 @@ def to_excel(self, excel_writer, sheet_name="Sheet1", na_rep="",
def to_json(self, path_or_buf=None, orient=None, date_format=None,
double_precision=10, force_ascii=True, date_unit='ms',
default_handler=None, lines=False, compression='infer',
index=True):
index=True, indent=0):
"""
Convert the object to a JSON string.

Expand Down Expand Up @@ -2260,6 +2260,11 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,

.. versionadded:: 0.23.0

indent : integer, default 0
Length of whitespace used to indent each record.

.. versionadded:: 0.25.0

Returns
-------
None or str
Expand Down Expand Up @@ -2325,7 +2330,7 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
force_ascii=force_ascii, date_unit=date_unit,
default_handler=default_handler,
lines=lines, compression=compression,
index=index)
index=index, indent=indent)

def to_hdf(self, path_or_buf, key, **kwargs):
"""
Expand Down
33 changes: 19 additions & 14 deletions pandas/io/json/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
def to_json(path_or_buf, obj, orient=None, date_format='epoch',
double_precision=10, force_ascii=True, date_unit='ms',
default_handler=None, lines=False, compression='infer',
index=True):
index=True, indent=0):

if not index and orient not in ['split', 'table']:
raise ValueError("'index=False' is only valid when 'orient' is "
Expand All @@ -59,7 +59,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
obj, orient=orient, date_format=date_format,
double_precision=double_precision, ensure_ascii=force_ascii,
date_unit=date_unit, default_handler=default_handler,
index=index).write()
index=index, indent=indent).write()

if lines:
s = _convert_to_line_delimits(s)
Expand All @@ -78,7 +78,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',

class Writer:
def __init__(self, obj, orient, date_format, double_precision,
ensure_ascii, date_unit, index, default_handler=None):
ensure_ascii, date_unit, index, default_handler=None,
indent=0):
self.obj = obj

if orient is None:
Expand All @@ -91,6 +92,7 @@ def __init__(self, obj, orient, date_format, double_precision,
self.date_unit = date_unit
self.default_handler = default_handler
self.index = index
self.indent = indent

self.is_copy = None
self._format_axes()
Expand All @@ -101,18 +103,20 @@ def _format_axes(self):
def write(self):
return self._write(self.obj, self.orient, self.double_precision,
self.ensure_ascii, self.date_unit,
self.date_format == 'iso', self.default_handler)
self.date_format == 'iso', self.default_handler,
self.indent)

def _write(self, obj, orient, double_precision, ensure_ascii,
date_unit, iso_dates, default_handler):
date_unit, iso_dates, default_handler, indent):
return dumps(
obj,
orient=orient,
double_precision=double_precision,
ensure_ascii=ensure_ascii,
date_unit=date_unit,
iso_dates=iso_dates,
default_handler=default_handler
default_handler=default_handler,
indent=indent
)


Expand All @@ -125,11 +129,11 @@ def _format_axes(self):
"'{orient}'".format(orient=self.orient))

def _write(self, obj, orient, double_precision, ensure_ascii,
date_unit, iso_dates, default_handler):
date_unit, iso_dates, default_handler, indent):
if not self.index and orient == 'split':
obj = {"name": obj.name, "data": obj.values}
return super()._write(obj, orient, double_precision, ensure_ascii,
date_unit, iso_dates, default_handler)
date_unit, iso_dates, default_handler, indent)


class FrameWriter(Writer):
Expand All @@ -149,19 +153,20 @@ def _format_axes(self):
"'{orient}'.".format(orient=self.orient))

def _write(self, obj, orient, double_precision, ensure_ascii,
date_unit, iso_dates, default_handler):
date_unit, iso_dates, default_handler, indent):
if not self.index and orient == 'split':
obj = obj.to_dict(orient='split')
del obj["index"]
return super()._write(obj, orient, double_precision, ensure_ascii,
date_unit, iso_dates, default_handler)
date_unit, iso_dates, default_handler, indent)


class JSONTableWriter(FrameWriter):
_default_orient = 'records'

def __init__(self, obj, orient, date_format, double_precision,
ensure_ascii, date_unit, index, default_handler=None):
ensure_ascii, date_unit, index, default_handler=None,
indent=0):
"""
Adds a `schema` attribute with the Table Schema, resets
the index (can't do in caller, because the schema inference needs
Expand All @@ -170,7 +175,7 @@ def __init__(self, obj, orient, date_format, double_precision,
"""
super().__init__(obj, orient, date_format, double_precision,
ensure_ascii, date_unit, index,
default_handler=default_handler)
default_handler=default_handler, indent=indent)

if date_format != 'iso':
msg = ("Trying to write with `orient='table'` and "
Expand Down Expand Up @@ -211,9 +216,9 @@ def __init__(self, obj, orient, date_format, double_precision,
self.index = index

def _write(self, obj, orient, double_precision, ensure_ascii,
date_unit, iso_dates, default_handler):
date_unit, iso_dates, default_handler, indent):
data = super()._write(obj, orient, double_precision, ensure_ascii,
date_unit, iso_dates, default_handler)
date_unit, iso_dates, default_handler, indent)
serialized = '{{"schema": {schema}, "data": {data}}}'.format(
schema=dumps(self.schema), data=data)
return serialized
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1317,3 +1317,23 @@ def test_read_timezone_information(self):
index=DatetimeIndex(['2019-01-01 11:00:00'],
tz='UTC'))
assert_series_equal(result, expected)

def test_to_json_indent(self):
# GH 12004
df = pd.DataFrame([
['foo', 'bar'], ['baz', 'qux']
], columns=['a', 'b'])

result = df.to_json(indent=4)
expected = """{
"a":{
"0":"foo",
"1":"baz"
},
"b":{
"0":"bar",
"1":"qux"
}
}"""

assert result == expected