Skip to content

Commit f89c29c

Browse files
committed
ENH: clearer error msg for unequal categoricals in merge_asof (GH#26136)
1 parent 9feb3ad commit f89c29c

File tree

3 files changed

+25
-4
lines changed

3 files changed

+25
-4
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Other Enhancements
3737
- :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`)
3838
- :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
3939
- :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
40+
- :func:`merge_asof` now gives a clearer error message when merge keys are categoricals that are not equal (:issue:`26136`)
4041

4142
.. _whatsnew_0250.api_breaking:
4243

pandas/core/reshape/merge.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -1446,10 +1446,18 @@ def _get_merge_keys(self):
14461446
# validate index types are the same
14471447
for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys)):
14481448
if not is_dtype_equal(lk.dtype, rk.dtype):
1449-
raise MergeError("incompatible merge keys [{i}] {lkdtype} and "
1450-
"{rkdtype}, must be the same type"
1451-
.format(i=i, lkdtype=lk.dtype,
1452-
rkdtype=rk.dtype))
1449+
if (is_categorical_dtype(lk.dtype) and
1450+
is_categorical_dtype(rk.dtype)):
1451+
# The generic error message is confusing for categoricals.
1452+
msg = ("incompatible merge keys [{i}] both sides "
1453+
"category, but not equal ones"
1454+
.format(i=i))
1455+
else:
1456+
msg = ("incompatible merge keys [{i}] {lkdtype} and "
1457+
"{rkdtype}, must be the same type"
1458+
.format(i=i, lkdtype=lk.dtype,
1459+
rkdtype=rk.dtype))
1460+
raise MergeError(msg)
14531461

14541462
# validate tolerance; must be a Timedelta if we have a DTI
14551463
if self.tolerance is not None:

pandas/tests/reshape/merge/test_merge_asof.py

+12
Original file line numberDiff line numberDiff line change
@@ -1006,6 +1006,18 @@ def test_merge_datatype_error(self):
10061006
with pytest.raises(MergeError, match=msg):
10071007
merge_asof(left, right, on='a')
10081008

1009+
def test_merge_datatype_categorical_error(self):
1010+
""" Tests merge datatype mismatch error """
1011+
msg = r'merge keys \[0\] both sides category, but not equal ones'
1012+
1013+
left = pd.DataFrame({'left_val': [1, 5, 10],
1014+
'a': pd.Categorical(['a', 'b', 'c'])})
1015+
right = pd.DataFrame({'right_val': [1, 2, 3, 6, 7],
1016+
'a': pd.Categorical(['a', 'X', 'c', 'X', 'b'])})
1017+
1018+
with pytest.raises(MergeError, match=msg):
1019+
merge_asof(left, right, on='a')
1020+
10091021
@pytest.mark.parametrize('func', [lambda x: x, lambda x: to_datetime(x)],
10101022
ids=['numeric', 'datetime'])
10111023
@pytest.mark.parametrize('side', ['left', 'right'])

0 commit comments

Comments
 (0)