diff --git a/doc/source/whatsnew/v0.16.2.txt b/doc/source/whatsnew/v0.16.2.txt index feccc19d8f70b..407699f703861 100644 --- a/doc/source/whatsnew/v0.16.2.txt +++ b/doc/source/whatsnew/v0.16.2.txt @@ -120,6 +120,7 @@ Bug Fixes - Bug where read_hdf store.select modifies the passed columns list when multi-indexed (:issue:`7212`) - Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`) +- Bug in ``to_json`` with certain orients and a ``CategoricalIndex`` would segfault (:issue:`10317`) - Bug where some of the nan funcs do not have consistent return dtypes (:issue:`10251`) diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index be9e0eccda8a1..bb0ad58a47d88 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -4,7 +4,7 @@ import os import numpy as np -from pandas import Series, DataFrame, DatetimeIndex, Timestamp +from pandas import Series, DataFrame, DatetimeIndex, Timestamp, CategoricalIndex from datetime import timedelta import pandas as pd read_json = pd.read_json @@ -23,6 +23,11 @@ for k, v in compat.iteritems(_seriesd))) _tsframe = DataFrame(_tsd) +_cat_frame = _frame.copy() +cat = ['bah']*5 + ['bar']*5 + ['baz']*5 + ['foo']*(len(_cat_frame)-15) +_cat_frame.index = pd.CategoricalIndex(cat,name='E') +_cat_frame['E'] = list(reversed(cat)) +_cat_frame['sort'] = np.arange(len(_cat_frame)) _mixed_frame = _frame.copy() @@ -48,6 +53,7 @@ def setUp(self): self.intframe = _intframe.copy() self.tsframe = _tsframe.copy() self.mixed_frame = _mixed_frame.copy() + self.categorical = _cat_frame.copy() def tearDown(self): del self.dirpath @@ -128,8 +134,22 @@ def _check(df): def test_frame_from_json_to_json(self): def _check_orient(df, orient, dtype=None, numpy=False, - convert_axes=True, check_dtype=True, raise_ok=None): - df = df.sort() + convert_axes=True, check_dtype=True, raise_ok=None, + sort=None): + if sort is not None: + df = df.sort(sort) + else: + df = df.sort() + + # if we are not unique, then check that we are raising ValueError + # for the appropriate orients + if not df.index.is_unique and orient in ['index','columns']: + self.assertRaises(ValueError, lambda : df.to_json(orient=orient)) + return + if not df.columns.is_unique and orient in ['index','columns','records']: + self.assertRaises(ValueError, lambda : df.to_json(orient=orient)) + return + dfjson = df.to_json(orient=orient) try: @@ -141,7 +161,10 @@ def _check_orient(df, orient, dtype=None, numpy=False, return raise - unser = unser.sort() + if sort is not None and sort in unser.columns: + unser = unser.sort(sort) + else: + unser = unser.sort() if dtype is False: check_dtype=False @@ -160,7 +183,9 @@ def _check_orient(df, orient, dtype=None, numpy=False, # index and col labels might not be strings unser.index = [str(i) for i in unser.index] unser.columns = [str(i) for i in unser.columns] - unser = unser.sort() + + if sort is None: + unser = unser.sort() assert_almost_equal(df.values, unser.values) else: if convert_axes: @@ -169,45 +194,45 @@ def _check_orient(df, orient, dtype=None, numpy=False, assert_frame_equal(df, unser, check_less_precise=False, check_dtype=check_dtype) - def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None): + def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None, sort=None): # numpy=False if convert_axes: - _check_orient(df, "columns", dtype=dtype) - _check_orient(df, "records", dtype=dtype) - _check_orient(df, "split", dtype=dtype) - _check_orient(df, "index", dtype=dtype) - _check_orient(df, "values", dtype=dtype) - - _check_orient(df, "columns", dtype=dtype, convert_axes=False) - _check_orient(df, "records", dtype=dtype, convert_axes=False) - _check_orient(df, "split", dtype=dtype, convert_axes=False) - _check_orient(df, "index", dtype=dtype, convert_axes=False) - _check_orient(df, "values", dtype=dtype ,convert_axes=False) + _check_orient(df, "columns", dtype=dtype, sort=sort) + _check_orient(df, "records", dtype=dtype, sort=sort) + _check_orient(df, "split", dtype=dtype, sort=sort) + _check_orient(df, "index", dtype=dtype, sort=sort) + _check_orient(df, "values", dtype=dtype, sort=sort) + + _check_orient(df, "columns", dtype=dtype, convert_axes=False, sort=sort) + _check_orient(df, "records", dtype=dtype, convert_axes=False, sort=sort) + _check_orient(df, "split", dtype=dtype, convert_axes=False, sort=sort) + _check_orient(df, "index", dtype=dtype, convert_axes=False, sort=sort) + _check_orient(df, "values", dtype=dtype ,convert_axes=False, sort=sort) # numpy=True and raise_ok might be not None, so ignore the error if convert_axes: _check_orient(df, "columns", dtype=dtype, numpy=True, - raise_ok=raise_ok) + raise_ok=raise_ok, sort=sort) _check_orient(df, "records", dtype=dtype, numpy=True, - raise_ok=raise_ok) + raise_ok=raise_ok, sort=sort) _check_orient(df, "split", dtype=dtype, numpy=True, - raise_ok=raise_ok) + raise_ok=raise_ok, sort=sort) _check_orient(df, "index", dtype=dtype, numpy=True, - raise_ok=raise_ok) + raise_ok=raise_ok, sort=sort) _check_orient(df, "values", dtype=dtype, numpy=True, - raise_ok=raise_ok) + raise_ok=raise_ok, sort=sort) _check_orient(df, "columns", dtype=dtype, numpy=True, - convert_axes=False, raise_ok=raise_ok) + convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "records", dtype=dtype, numpy=True, - convert_axes=False, raise_ok=raise_ok) + convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "split", dtype=dtype, numpy=True, - convert_axes=False, raise_ok=raise_ok) + convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "index", dtype=dtype, numpy=True, - convert_axes=False, raise_ok=raise_ok) + convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "values", dtype=dtype, numpy=True, - convert_axes=False, raise_ok=raise_ok) + convert_axes=False, raise_ok=raise_ok, sort=sort) # basic _check_all_orients(self.frame) @@ -233,6 +258,9 @@ def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None): _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3', convert_axes=False, raise_ok=ValueError) + # categorical + _check_all_orients(self.categorical, sort='sort', raise_ok=ValueError) + # empty _check_all_orients(self.empty_frame) diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c index dcf107d7965e6..ac415f4d5f195 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/src/ujson/python/objToJSON.c @@ -1814,7 +1814,7 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc) { - PyObject *obj, *exc, *toDictFunc, *tmpObj; + PyObject *obj, *exc, *toDictFunc, *tmpObj, *getValuesFunc; TypeContext *pc; PyObjectEncoder *enc; double val; @@ -2082,14 +2082,25 @@ void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc) return; } - PRINTMARK(); - tc->type = JT_ARRAY; - pc->newObj = PyObject_GetAttrString(obj, "values"); - pc->iterBegin = NpyArr_iterBegin; - pc->iterEnd = NpyArr_iterEnd; - pc->iterNext = NpyArr_iterNext; - pc->iterGetValue = NpyArr_iterGetValue; - pc->iterGetName = NpyArr_iterGetName; + PyObject* getValuesFunc = PyObject_GetAttrString(obj, "get_values"); + if (getValuesFunc) + { + PRINTMARK(); + tc->type = JT_ARRAY; + pc->newObj = PyObject_CallObject(getValuesFunc, NULL); + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + + Py_DECREF(getValuesFunc); + } + else + { + goto INVALID; + } + return; } else