Skip to content

Commit 88d81cc

Browse files
authored
TST add test for dtype consistency with pd replace #23305 (#35234)
1 parent fe1ea14 commit 88d81cc

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed

pandas/tests/frame/methods/test_replace.py

+80
Original file line numberDiff line numberDiff line change
@@ -1493,3 +1493,83 @@ def test_replace_period_ignore_float(self):
14931493
result = df.replace(1.0, 0.0)
14941494
expected = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3})
14951495
tm.assert_frame_equal(expected, result)
1496+
1497+
def test_replace_value_category_type(self):
1498+
"""
1499+
Test for #23305: to ensure category dtypes are maintained
1500+
after replace with direct values
1501+
"""
1502+
1503+
# create input data
1504+
input_dict = {
1505+
"col1": [1, 2, 3, 4],
1506+
"col2": ["a", "b", "c", "d"],
1507+
"col3": [1.5, 2.5, 3.5, 4.5],
1508+
"col4": ["cat1", "cat2", "cat3", "cat4"],
1509+
"col5": ["obj1", "obj2", "obj3", "obj4"],
1510+
}
1511+
# explicitly cast columns as category and order them
1512+
input_df = pd.DataFrame(data=input_dict).astype(
1513+
{"col2": "category", "col4": "category"}
1514+
)
1515+
input_df["col2"] = input_df["col2"].cat.reorder_categories(
1516+
["a", "b", "c", "d"], ordered=True
1517+
)
1518+
input_df["col4"] = input_df["col4"].cat.reorder_categories(
1519+
["cat1", "cat2", "cat3", "cat4"], ordered=True
1520+
)
1521+
1522+
# create expected dataframe
1523+
expected_dict = {
1524+
"col1": [1, 2, 3, 4],
1525+
"col2": ["a", "b", "c", "z"],
1526+
"col3": [1.5, 2.5, 3.5, 4.5],
1527+
"col4": ["cat1", "catX", "cat3", "cat4"],
1528+
"col5": ["obj9", "obj2", "obj3", "obj4"],
1529+
}
1530+
# explicitly cast columns as category and order them
1531+
expected = pd.DataFrame(data=expected_dict).astype(
1532+
{"col2": "category", "col4": "category"}
1533+
)
1534+
expected["col2"] = expected["col2"].cat.reorder_categories(
1535+
["a", "b", "c", "z"], ordered=True
1536+
)
1537+
expected["col4"] = expected["col4"].cat.reorder_categories(
1538+
["cat1", "catX", "cat3", "cat4"], ordered=True
1539+
)
1540+
1541+
# replace values in input dataframe
1542+
input_df = input_df.replace("d", "z")
1543+
input_df = input_df.replace("obj1", "obj9")
1544+
result = input_df.replace("cat2", "catX")
1545+
1546+
tm.assert_frame_equal(result, expected)
1547+
1548+
@pytest.mark.xfail(
1549+
reason="category dtype gets changed to object type after replace, see #35268",
1550+
strict=True,
1551+
)
1552+
def test_replace_dict_category_type(self, input_category_df, expected_category_df):
1553+
"""
1554+
Test to ensure category dtypes are maintained
1555+
after replace with dict values
1556+
"""
1557+
1558+
# create input dataframe
1559+
input_dict = {"col1": ["a"], "col2": ["obj1"], "col3": ["cat1"]}
1560+
# explicitly cast columns as category
1561+
input_df = pd.DataFrame(data=input_dict).astype(
1562+
{"col1": "category", "col2": "category", "col3": "category"}
1563+
)
1564+
1565+
# create expected dataframe
1566+
expected_dict = {"col1": ["z"], "col2": ["obj9"], "col3": ["catX"]}
1567+
# explicitly cast columns as category
1568+
expected = pd.DataFrame(data=expected_dict).astype(
1569+
{"col1": "category", "col2": "category", "col3": "category"}
1570+
)
1571+
1572+
# replace values in input dataframe using a dict
1573+
result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"})
1574+
1575+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)