Skip to content

Commit 8b9f933

Browse files
gfyoungjreback
authored andcommitted
TST: Check merging on equivalent CategoricalDtype (pandas-dev#25898)
Closes pandas-devgh-22501
1 parent 96a128e commit 8b9f933

File tree

2 files changed

+66
-0
lines changed

2 files changed

+66
-0
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,7 @@ Reshaping
382382
- :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`)
383383
- Bug in :func:`concat` where order of ``OrderedDict`` (and ``dict`` in Python 3.6+) is not respected, when passed in as ``objs`` argument (:issue:`21510`)
384384
- Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`).
385+
- Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`)
385386

386387
Sparse
387388
^^^^^^

pandas/tests/reshape/merge/test_merge.py

+65
Original file line numberDiff line numberDiff line change
@@ -1666,3 +1666,68 @@ def test_merge_suffix_none_error(col1, col2, suffixes):
16661666
msg = "iterable"
16671667
with pytest.raises(TypeError, match=msg):
16681668
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
1669+
1670+
1671+
@pytest.mark.parametrize("cat_dtype", ["one", "two"])
1672+
@pytest.mark.parametrize("reverse", [True, False])
1673+
def test_merge_equal_cat_dtypes(cat_dtype, reverse):
1674+
# see gh-22501
1675+
cat_dtypes = {
1676+
"one": CategoricalDtype(categories=["a", "b", "c"], ordered=False),
1677+
"two": CategoricalDtype(categories=["a", "b", "c"], ordered=False),
1678+
}
1679+
1680+
df1 = DataFrame({
1681+
"foo": Series(["a", "b", "c"]).astype(cat_dtypes["one"]),
1682+
"left": [1, 2, 3],
1683+
}).set_index("foo")
1684+
1685+
data_foo = ["a", "b", "c"]
1686+
data_right = [1, 2, 3]
1687+
1688+
if reverse:
1689+
data_foo.reverse()
1690+
data_right.reverse()
1691+
1692+
df2 = DataFrame({
1693+
"foo": Series(data_foo).astype(cat_dtypes[cat_dtype]),
1694+
"right": data_right
1695+
}).set_index("foo")
1696+
1697+
result = df1.merge(df2, left_index=True, right_index=True)
1698+
1699+
expected = DataFrame({
1700+
"left": [1, 2, 3],
1701+
"right": [1, 2, 3],
1702+
"foo": Series(["a", "b", "c"]).astype(cat_dtypes["one"]),
1703+
}).set_index("foo")
1704+
1705+
# Categorical is unordered, so don't check ordering.
1706+
tm.assert_frame_equal(result, expected, check_categorical=False)
1707+
1708+
1709+
def test_merge_equal_cat_dtypes2():
1710+
# see gh-22501
1711+
cat_dtype = CategoricalDtype(categories=["a", "b", "c"], ordered=False)
1712+
1713+
# Test Data
1714+
df1 = DataFrame({
1715+
"foo": Series(["a", "b"]).astype(cat_dtype),
1716+
"left": [1, 2],
1717+
}).set_index("foo")
1718+
1719+
df2 = DataFrame({
1720+
"foo": Series(["a", "b", "c"]).astype(cat_dtype),
1721+
"right": [3, 2, 1],
1722+
}).set_index("foo")
1723+
1724+
result = df1.merge(df2, left_index=True, right_index=True)
1725+
1726+
expected = DataFrame({
1727+
"left": [1, 2],
1728+
"right": [3, 2],
1729+
"foo": Series(["a", "b"]).astype(cat_dtype),
1730+
}).set_index("foo")
1731+
1732+
# Categorical is unordered, so don't check ordering.
1733+
tm.assert_frame_equal(result, expected, check_categorical=False)

0 commit comments

Comments
 (0)