diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b4b98ec0403a8..84f1fa6ad8086 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -302,8 +302,7 @@ Reshaping - Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) - Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`) - Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`, :issue:`38907`) -- - +- :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index ac5fc7cddf82a..1caf1a2a023da 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1708,6 +1708,23 @@ def _validate_specification(self): if self.left_by is not None and self.right_by is None: raise MergeError("missing right_by") + # GH#29130 Check that merge keys do not have dtype object + lo_dtype = ( + self.left[self.left_on[0]].dtype + if not self.left_index + else self.left.index.dtype + ) + ro_dtype = ( + self.right[self.right_on[0]].dtype + if not self.right_index + else self.right.index.dtype + ) + if is_object_dtype(lo_dtype) or is_object_dtype(ro_dtype): + raise MergeError( + f"Incompatible merge dtype, {repr(ro_dtype)} and " + f"{repr(lo_dtype)}, both sides must have numeric dtype" + ) + # add 'by' to our key-list so we can have it in the # output as a key if self.left_by is not None: diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 5cb7bdd603517..ecff63b495fbb 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1168,7 +1168,7 @@ def test_on_float_by_int(self): tm.assert_frame_equal(result, expected) def test_merge_datatype_error_raises(self): - msg = r"incompatible merge keys \[0\] .*, must be the same type" + msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype" left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]}) right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]}) @@ -1373,3 +1373,39 @@ def test_left_index_right_index_tolerance(self): tolerance=Timedelta(seconds=0.5), ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs", [{"on": "x"}, {"left_index": True, "right_index": True}] +) +@pytest.mark.parametrize( + "data", + [["2019-06-01 00:09:12", "2019-06-01 00:10:29"], [1.0, "2019-06-01 00:10:29"]], +) +def test_merge_asof_non_numerical_dtype(kwargs, data): + # GH#29130 + left = pd.DataFrame({"x": data}, index=data) + right = pd.DataFrame({"x": data}, index=data) + with pytest.raises( + MergeError, + match=r"Incompatible merge dtype, .*, both sides must have numeric dtype", + ): + pd.merge_asof(left, right, **kwargs) + + +def test_merge_asof_non_numerical_dtype_object(): + # GH#29130 + left = pd.DataFrame({"a": ["12", "13", "15"], "left_val1": ["a", "b", "c"]}) + right = pd.DataFrame({"a": ["a", "b", "c"], "left_val": ["d", "e", "f"]}) + with pytest.raises( + MergeError, + match=r"Incompatible merge dtype, .*, both sides must have numeric dtype", + ): + pd.merge_asof( + left, + right, + left_on="left_val1", + right_on="a", + left_by="a", + right_by="left_val", + )