Skip to content

Commit 07ea11c

Browse files
committed
Merge pull request #10322 from evanpw/json
Bug in to_json causing segfault with a CategoricalIndex (GH #10317)
2 parents ba69a49 + 588437c commit 07ea11c

File tree

3 files changed

+76
-36
lines changed

3 files changed

+76
-36
lines changed

doc/source/whatsnew/v0.16.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ Bug Fixes
120120
- Bug where read_hdf store.select modifies the passed columns list when
121121
multi-indexed (:issue:`7212`)
122122
- Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`)
123+
- Bug in ``to_json`` with certain orients and a ``CategoricalIndex`` would segfault (:issue:`10317`)
123124

124125
- Bug where some of the nan funcs do not have consistent return dtypes (:issue:`10251`)
125126

pandas/io/tests/test_json/test_pandas.py

+55-27
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import os
55

66
import numpy as np
7-
from pandas import Series, DataFrame, DatetimeIndex, Timestamp
7+
from pandas import Series, DataFrame, DatetimeIndex, Timestamp, CategoricalIndex
88
from datetime import timedelta
99
import pandas as pd
1010
read_json = pd.read_json
@@ -23,6 +23,11 @@
2323
for k, v in compat.iteritems(_seriesd)))
2424

2525
_tsframe = DataFrame(_tsd)
26+
_cat_frame = _frame.copy()
27+
cat = ['bah']*5 + ['bar']*5 + ['baz']*5 + ['foo']*(len(_cat_frame)-15)
28+
_cat_frame.index = pd.CategoricalIndex(cat,name='E')
29+
_cat_frame['E'] = list(reversed(cat))
30+
_cat_frame['sort'] = np.arange(len(_cat_frame))
2631

2732
_mixed_frame = _frame.copy()
2833

@@ -48,6 +53,7 @@ def setUp(self):
4853
self.intframe = _intframe.copy()
4954
self.tsframe = _tsframe.copy()
5055
self.mixed_frame = _mixed_frame.copy()
56+
self.categorical = _cat_frame.copy()
5157

5258
def tearDown(self):
5359
del self.dirpath
@@ -128,8 +134,22 @@ def _check(df):
128134

129135
def test_frame_from_json_to_json(self):
130136
def _check_orient(df, orient, dtype=None, numpy=False,
131-
convert_axes=True, check_dtype=True, raise_ok=None):
132-
df = df.sort()
137+
convert_axes=True, check_dtype=True, raise_ok=None,
138+
sort=None):
139+
if sort is not None:
140+
df = df.sort(sort)
141+
else:
142+
df = df.sort()
143+
144+
# if we are not unique, then check that we are raising ValueError
145+
# for the appropriate orients
146+
if not df.index.is_unique and orient in ['index','columns']:
147+
self.assertRaises(ValueError, lambda : df.to_json(orient=orient))
148+
return
149+
if not df.columns.is_unique and orient in ['index','columns','records']:
150+
self.assertRaises(ValueError, lambda : df.to_json(orient=orient))
151+
return
152+
133153
dfjson = df.to_json(orient=orient)
134154

135155
try:
@@ -141,7 +161,10 @@ def _check_orient(df, orient, dtype=None, numpy=False,
141161
return
142162
raise
143163

144-
unser = unser.sort()
164+
if sort is not None and sort in unser.columns:
165+
unser = unser.sort(sort)
166+
else:
167+
unser = unser.sort()
145168

146169
if dtype is False:
147170
check_dtype=False
@@ -160,7 +183,9 @@ def _check_orient(df, orient, dtype=None, numpy=False,
160183
# index and col labels might not be strings
161184
unser.index = [str(i) for i in unser.index]
162185
unser.columns = [str(i) for i in unser.columns]
163-
unser = unser.sort()
186+
187+
if sort is None:
188+
unser = unser.sort()
164189
assert_almost_equal(df.values, unser.values)
165190
else:
166191
if convert_axes:
@@ -169,45 +194,45 @@ def _check_orient(df, orient, dtype=None, numpy=False,
169194
assert_frame_equal(df, unser, check_less_precise=False,
170195
check_dtype=check_dtype)
171196

172-
def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None):
197+
def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None, sort=None):
173198

174199
# numpy=False
175200
if convert_axes:
176-
_check_orient(df, "columns", dtype=dtype)
177-
_check_orient(df, "records", dtype=dtype)
178-
_check_orient(df, "split", dtype=dtype)
179-
_check_orient(df, "index", dtype=dtype)
180-
_check_orient(df, "values", dtype=dtype)
181-
182-
_check_orient(df, "columns", dtype=dtype, convert_axes=False)
183-
_check_orient(df, "records", dtype=dtype, convert_axes=False)
184-
_check_orient(df, "split", dtype=dtype, convert_axes=False)
185-
_check_orient(df, "index", dtype=dtype, convert_axes=False)
186-
_check_orient(df, "values", dtype=dtype ,convert_axes=False)
201+
_check_orient(df, "columns", dtype=dtype, sort=sort)
202+
_check_orient(df, "records", dtype=dtype, sort=sort)
203+
_check_orient(df, "split", dtype=dtype, sort=sort)
204+
_check_orient(df, "index", dtype=dtype, sort=sort)
205+
_check_orient(df, "values", dtype=dtype, sort=sort)
206+
207+
_check_orient(df, "columns", dtype=dtype, convert_axes=False, sort=sort)
208+
_check_orient(df, "records", dtype=dtype, convert_axes=False, sort=sort)
209+
_check_orient(df, "split", dtype=dtype, convert_axes=False, sort=sort)
210+
_check_orient(df, "index", dtype=dtype, convert_axes=False, sort=sort)
211+
_check_orient(df, "values", dtype=dtype ,convert_axes=False, sort=sort)
187212

188213
# numpy=True and raise_ok might be not None, so ignore the error
189214
if convert_axes:
190215
_check_orient(df, "columns", dtype=dtype, numpy=True,
191-
raise_ok=raise_ok)
216+
raise_ok=raise_ok, sort=sort)
192217
_check_orient(df, "records", dtype=dtype, numpy=True,
193-
raise_ok=raise_ok)
218+
raise_ok=raise_ok, sort=sort)
194219
_check_orient(df, "split", dtype=dtype, numpy=True,
195-
raise_ok=raise_ok)
220+
raise_ok=raise_ok, sort=sort)
196221
_check_orient(df, "index", dtype=dtype, numpy=True,
197-
raise_ok=raise_ok)
222+
raise_ok=raise_ok, sort=sort)
198223
_check_orient(df, "values", dtype=dtype, numpy=True,
199-
raise_ok=raise_ok)
224+
raise_ok=raise_ok, sort=sort)
200225

201226
_check_orient(df, "columns", dtype=dtype, numpy=True,
202-
convert_axes=False, raise_ok=raise_ok)
227+
convert_axes=False, raise_ok=raise_ok, sort=sort)
203228
_check_orient(df, "records", dtype=dtype, numpy=True,
204-
convert_axes=False, raise_ok=raise_ok)
229+
convert_axes=False, raise_ok=raise_ok, sort=sort)
205230
_check_orient(df, "split", dtype=dtype, numpy=True,
206-
convert_axes=False, raise_ok=raise_ok)
231+
convert_axes=False, raise_ok=raise_ok, sort=sort)
207232
_check_orient(df, "index", dtype=dtype, numpy=True,
208-
convert_axes=False, raise_ok=raise_ok)
233+
convert_axes=False, raise_ok=raise_ok, sort=sort)
209234
_check_orient(df, "values", dtype=dtype, numpy=True,
210-
convert_axes=False, raise_ok=raise_ok)
235+
convert_axes=False, raise_ok=raise_ok, sort=sort)
211236

212237
# basic
213238
_check_all_orients(self.frame)
@@ -233,6 +258,9 @@ def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None):
233258
_check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3',
234259
convert_axes=False, raise_ok=ValueError)
235260

261+
# categorical
262+
_check_all_orients(self.categorical, sort='sort', raise_ok=ValueError)
263+
236264
# empty
237265
_check_all_orients(self.empty_frame)
238266

pandas/src/ujson/python/objToJSON.c

+20-9
Original file line numberDiff line numberDiff line change
@@ -1814,7 +1814,7 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in
18141814

18151815
void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)
18161816
{
1817-
PyObject *obj, *exc, *toDictFunc, *tmpObj;
1817+
PyObject *obj, *exc, *toDictFunc, *tmpObj, *getValuesFunc;
18181818
TypeContext *pc;
18191819
PyObjectEncoder *enc;
18201820
double val;
@@ -2082,14 +2082,25 @@ void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)
20822082
return;
20832083
}
20842084

2085-
PRINTMARK();
2086-
tc->type = JT_ARRAY;
2087-
pc->newObj = PyObject_GetAttrString(obj, "values");
2088-
pc->iterBegin = NpyArr_iterBegin;
2089-
pc->iterEnd = NpyArr_iterEnd;
2090-
pc->iterNext = NpyArr_iterNext;
2091-
pc->iterGetValue = NpyArr_iterGetValue;
2092-
pc->iterGetName = NpyArr_iterGetName;
2085+
PyObject* getValuesFunc = PyObject_GetAttrString(obj, "get_values");
2086+
if (getValuesFunc)
2087+
{
2088+
PRINTMARK();
2089+
tc->type = JT_ARRAY;
2090+
pc->newObj = PyObject_CallObject(getValuesFunc, NULL);
2091+
pc->iterBegin = NpyArr_iterBegin;
2092+
pc->iterEnd = NpyArr_iterEnd;
2093+
pc->iterNext = NpyArr_iterNext;
2094+
pc->iterGetValue = NpyArr_iterGetValue;
2095+
pc->iterGetName = NpyArr_iterGetName;
2096+
2097+
Py_DECREF(getValuesFunc);
2098+
}
2099+
else
2100+
{
2101+
goto INVALID;
2102+
}
2103+
20932104
return;
20942105
}
20952106
else

0 commit comments

Comments
 (0)