diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d9071a80b5db7..b74abc965f7fa 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -15,6 +15,16 @@ from pandas import DataFrame, DatetimeIndex, Series, Timestamp, read_json import pandas._testing as tm +_seriesd = tm.getSeriesData() + +_frame = DataFrame(_seriesd) + +_cat_frame = _frame.copy() +cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15) +_cat_frame.index = pd.CategoricalIndex(cat, name="E") +_cat_frame["E"] = list(reversed(cat)) +_cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64") + def assert_json_roundtrip_equal(result, expected, orient): if orient == "records" or orient == "values": @@ -26,6 +36,12 @@ def assert_json_roundtrip_equal(result, expected, orient): @pytest.mark.filterwarnings("ignore:the 'numpy' keyword is deprecated:FutureWarning") class TestPandasContainer: + @pytest.fixture(autouse=True) + def setup(self): + self.categorical = _cat_frame.copy() + + yield + def test_frame_double_encoded_labels(self, orient): df = DataFrame( [["a", "b"], ["c", "d"]], @@ -167,21 +183,25 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype): @pytest.mark.parametrize("convert_axes", [True, False]) @pytest.mark.parametrize("numpy", [True, False]) def test_roundtrip_categorical(self, orient, convert_axes, numpy): - cats = ["a", "b"] - df = pd.DataFrame( - pd.Categorical(cats), index=pd.CategoricalIndex(cats), columns=["cat"] - ) + # TODO: create a better frame to test with and improve coverage + if orient in ("index", "columns"): + pytest.xfail(f"Can't have duplicate index values for orient '{orient}')") - data = df.to_json(orient=orient) - if numpy and orient != "split": + data = self.categorical.to_json(orient=orient) + if numpy and orient in ("records", "values"): pytest.xfail(f"Orient {orient} is broken with numpy=True") result = pd.read_json( data, orient=orient, convert_axes=convert_axes, numpy=numpy ) - # Categorical dtypes are not preserved on round trip - expected = pd.DataFrame(cats, index=cats, columns=["cat"]) + expected = self.categorical.copy() + expected.index = expected.index.astype(str) # Categorical not preserved + expected.index.name = None # index names aren't preserved in JSON + + if not numpy and orient == "index": + expected = expected.sort_index() + assert_json_roundtrip_equal(result, expected, orient) @pytest.mark.parametrize("convert_axes", [True, False])