Skip to content

Commit b693f2d

Browse files
committed
TST: Check merging on equivalent CategoricalDtype
Closes pandas-devgh-22501
1 parent 882961d commit b693f2d

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ Reshaping
358358
- :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`)
359359
- Bug in :func:`concat` where order of ``OrderedDict`` (and ``dict`` in Python 3.6+) is not respected, when passed in as ``objs`` argument (:issue:`21510`)
360360
- Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`).
361+
- Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`)
361362

362363
Sparse
363364
^^^^^^

pandas/tests/reshape/merge/test_merge.py

+63
Original file line numberDiff line numberDiff line change
@@ -1666,3 +1666,66 @@ def test_merge_suffix_none_error(col1, col2, suffixes):
16661666
msg = "iterable"
16671667
with pytest.raises(TypeError, match=msg):
16681668
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
1669+
1670+
1671+
@pytest.mark.parametrize("cat_dtype", ["one", "two"])
1672+
@pytest.mark.parametrize("reverse", [True, False])
1673+
def test_merge_equal_cat_dtypes(cat_dtype, reverse):
1674+
# see gh-22501
1675+
cat_dtypes = {
1676+
"one": CategoricalDtype(categories=["a", "b", "c"], ordered=False),
1677+
"two": CategoricalDtype(categories=["a", "b", "c"], ordered=False),
1678+
}
1679+
1680+
df1 = DataFrame({
1681+
"foo": Series(["a", "b", "c"]).astype(cat_dtypes["one"]),
1682+
"left": [1, 2, 3],
1683+
}).set_index("foo")
1684+
1685+
data_foo = ["a", "b", "c"]
1686+
data_right = [1, 2, 3]
1687+
1688+
if reverse:
1689+
data_foo.reverse()
1690+
data_right.reverse()
1691+
1692+
df2 = DataFrame({
1693+
"foo": Series(data_foo).astype(cat_dtypes[cat_dtype]),
1694+
"right": data_right
1695+
}).set_index("foo")
1696+
1697+
result = df1.merge(df2, left_index=True, right_index=True)
1698+
1699+
expected = DataFrame({
1700+
"left": [1, 2, 3],
1701+
"right": [1, 2, 3],
1702+
"foo": Series(["a", "b", "c"]).astype(cat_dtypes["one"]),
1703+
}).set_index("foo")
1704+
1705+
tm.assert_frame_equal(result, expected)
1706+
1707+
1708+
def test_merge_equal_cat_dtypes2():
1709+
# see gh-22501
1710+
cat_dtype = CategoricalDtype(categories=["a", "b", "c"], ordered=False)
1711+
1712+
# Test Data
1713+
df1 = DataFrame({
1714+
"foo": Series(["a", "b"]).astype(cat_dtype),
1715+
"left": [1, 2],
1716+
}).set_index("foo")
1717+
1718+
df2 = DataFrame({
1719+
"foo": Series(["a", "b", "c"]).astype(cat_dtype),
1720+
"right": [3, 2, 1],
1721+
}).set_index("foo")
1722+
1723+
result = df1.merge(df2, left_index=True, right_index=True)
1724+
1725+
expected = DataFrame({
1726+
"left": [1, 2],
1727+
"right": [3, 2],
1728+
"foo": Series(["a", "b"]).astype(cat_dtype),
1729+
}).set_index("foo")
1730+
1731+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)