Skip to content

Clean Up Categorical Test for JSON #33228

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 3, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 8 additions & 28 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,6 @@
from pandas import DataFrame, DatetimeIndex, Series, Timestamp, read_json
import pandas._testing as tm

_seriesd = tm.getSeriesData()

_frame = DataFrame(_seriesd)

_cat_frame = _frame.copy()
cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15)
_cat_frame.index = pd.CategoricalIndex(cat, name="E")
_cat_frame["E"] = list(reversed(cat))
_cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64")


def assert_json_roundtrip_equal(result, expected, orient):
if orient == "records" or orient == "values":
Expand All @@ -36,12 +26,6 @@ def assert_json_roundtrip_equal(result, expected, orient):

@pytest.mark.filterwarnings("ignore:the 'numpy' keyword is deprecated:FutureWarning")
class TestPandasContainer:
@pytest.fixture(autouse=True)
def setup(self):
self.categorical = _cat_frame.copy()

yield

def test_frame_double_encoded_labels(self, orient):
df = DataFrame(
[["a", "b"], ["c", "d"]],
Expand Down Expand Up @@ -183,25 +167,21 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype):
@pytest.mark.parametrize("convert_axes", [True, False])
@pytest.mark.parametrize("numpy", [True, False])
def test_roundtrip_categorical(self, orient, convert_axes, numpy):
# TODO: create a better frame to test with and improve coverage
if orient in ("index", "columns"):
pytest.xfail(f"Can't have duplicate index values for orient '{orient}')")
cats = ["a", "b"]
df = pd.DataFrame(
pd.Categorical(cats), index=pd.CategoricalIndex(cats), columns=["cat"]
)

data = self.categorical.to_json(orient=orient)
if numpy and orient in ("records", "values"):
data = df.to_json(orient=orient)
if numpy and orient != "split":
Copy link
Member Author

@WillAyd WillAyd Apr 2, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the xfail preceding this because it was no longer applicable, but it unearthed a new issue when numpy=True and any orient except split.

That keyword is deprecated in #28512 and scheduled for removal in 2.0, so figured not worth investing much in

pytest.xfail(f"Orient {orient} is broken with numpy=True")

result = pd.read_json(
data, orient=orient, convert_axes=convert_axes, numpy=numpy
)

expected = self.categorical.copy()
expected.index = expected.index.astype(str) # Categorical not preserved
expected.index.name = None # index names aren't preserved in JSON

if not numpy and orient == "index":
expected = expected.sort_index()

# Categorical dtypes are not preserved on round trip
expected = pd.DataFrame(cats, index=cats, columns=["cat"])
assert_json_roundtrip_equal(result, expected, orient)

@pytest.mark.parametrize("convert_axes", [True, False])
Expand Down