Skip to content

Commit cd0224d

Browse files
authored
BUG: Raise ValueError for non numerical join columns in merge_asof (#34488)
1 parent 3f39bce commit cd0224d

File tree

3 files changed

+55
-3
lines changed

3 files changed

+55
-3
lines changed

doc/source/whatsnew/v1.3.0.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -309,8 +309,7 @@ Reshaping
309309
- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`)
310310
- Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`)
311311
- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`, :issue:`38907`)
312-
-
313-
312+
- :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)
314313

315314
Sparse
316315
^^^^^^

pandas/core/reshape/merge.py

+17
Original file line numberDiff line numberDiff line change
@@ -1708,6 +1708,23 @@ def _validate_specification(self):
17081708
if self.left_by is not None and self.right_by is None:
17091709
raise MergeError("missing right_by")
17101710

1711+
# GH#29130 Check that merge keys do not have dtype object
1712+
lo_dtype = (
1713+
self.left[self.left_on[0]].dtype
1714+
if not self.left_index
1715+
else self.left.index.dtype
1716+
)
1717+
ro_dtype = (
1718+
self.right[self.right_on[0]].dtype
1719+
if not self.right_index
1720+
else self.right.index.dtype
1721+
)
1722+
if is_object_dtype(lo_dtype) or is_object_dtype(ro_dtype):
1723+
raise MergeError(
1724+
f"Incompatible merge dtype, {repr(ro_dtype)} and "
1725+
f"{repr(lo_dtype)}, both sides must have numeric dtype"
1726+
)
1727+
17111728
# add 'by' to our key-list so we can have it in the
17121729
# output as a key
17131730
if self.left_by is not None:

pandas/tests/reshape/merge/test_merge_asof.py

+37-1
Original file line numberDiff line numberDiff line change
@@ -1168,7 +1168,7 @@ def test_on_float_by_int(self):
11681168
tm.assert_frame_equal(result, expected)
11691169

11701170
def test_merge_datatype_error_raises(self):
1171-
msg = r"incompatible merge keys \[0\] .*, must be the same type"
1171+
msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
11721172

11731173
left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]})
11741174
right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]})
@@ -1373,3 +1373,39 @@ def test_left_index_right_index_tolerance(self):
13731373
tolerance=Timedelta(seconds=0.5),
13741374
)
13751375
tm.assert_frame_equal(result, expected)
1376+
1377+
1378+
@pytest.mark.parametrize(
1379+
"kwargs", [{"on": "x"}, {"left_index": True, "right_index": True}]
1380+
)
1381+
@pytest.mark.parametrize(
1382+
"data",
1383+
[["2019-06-01 00:09:12", "2019-06-01 00:10:29"], [1.0, "2019-06-01 00:10:29"]],
1384+
)
1385+
def test_merge_asof_non_numerical_dtype(kwargs, data):
1386+
# GH#29130
1387+
left = pd.DataFrame({"x": data}, index=data)
1388+
right = pd.DataFrame({"x": data}, index=data)
1389+
with pytest.raises(
1390+
MergeError,
1391+
match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
1392+
):
1393+
pd.merge_asof(left, right, **kwargs)
1394+
1395+
1396+
def test_merge_asof_non_numerical_dtype_object():
1397+
# GH#29130
1398+
left = pd.DataFrame({"a": ["12", "13", "15"], "left_val1": ["a", "b", "c"]})
1399+
right = pd.DataFrame({"a": ["a", "b", "c"], "left_val": ["d", "e", "f"]})
1400+
with pytest.raises(
1401+
MergeError,
1402+
match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
1403+
):
1404+
pd.merge_asof(
1405+
left,
1406+
right,
1407+
left_on="left_val1",
1408+
right_on="a",
1409+
left_by="a",
1410+
right_by="left_val",
1411+
)

0 commit comments

Comments
 (0)