diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 7e8403c94ceef..b3ea6fbd4ae2d 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -32,7 +32,8 @@ Other enhancements - :meth:`MultiIndex.sortlevel` and :meth:`Index.sortlevel` gained a new keyword ``na_position`` (:issue:`51612`) - Improve error message when setting :class:`DataFrame` with wrong number of columns through :meth:`DataFrame.isetitem` (:issue:`51701`) - Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`) -- :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`). +- :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`) +- Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`) .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 2752efd850933..21ce1d3c96379 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1297,8 +1297,8 @@ def _maybe_coerce_merge_keys(self) -> None: continue msg = ( - f"You are trying to merge on {lk.dtype} and " - f"{rk.dtype} columns. If you wish to proceed you should use pd.concat" + f"You are trying to merge on {lk.dtype} and {rk.dtype} columns " + f"for key '{name}'. If you wish to proceed you should use pd.concat" ) # if we are numeric, then allow differing diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 93bca0739298f..d5b0ad6b2d56d 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -141,10 +141,9 @@ def test_join_on(self, target_source): # overlap source_copy = source.copy() - source_copy["A"] = 0 msg = ( - "You are trying to merge on float64 and object columns. If " - "you wish to proceed you should use pd.concat" + "You are trying to merge on float64 and object columns for key 'A'. " + "If you wish to proceed you should use pd.concat" ) with pytest.raises(ValueError, match=msg): target.join(source_copy, on="A") diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index ad90d5ae147c8..6f2b327c37067 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1630,9 +1630,8 @@ def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals): df2 = DataFrame({"A": df2_vals}) msg = ( - f"You are trying to merge on {df1['A'].dtype} and " - f"{df2['A'].dtype} columns. If you wish to proceed " - "you should use pd.concat" + f"You are trying to merge on {df1['A'].dtype} and {df2['A'].dtype} " + "columns for key 'A'. If you wish to proceed you should use pd.concat" ) msg = re.escape(msg) with pytest.raises(ValueError, match=msg): @@ -1640,14 +1639,41 @@ def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals): # Check that error still raised when swapping order of dataframes msg = ( - f"You are trying to merge on {df2['A'].dtype} and " - f"{df1['A'].dtype} columns. If you wish to proceed " - "you should use pd.concat" + f"You are trying to merge on {df2['A'].dtype} and {df1['A'].dtype} " + "columns for key 'A'. If you wish to proceed you should use pd.concat" ) msg = re.escape(msg) with pytest.raises(ValueError, match=msg): merge(df2, df1, on=["A"]) + # Check that error still raised when merging on multiple columns + # The error message should mention the first incompatible column + if len(df1_vals) == len(df2_vals): + # Column A in df1 and df2 is of compatible (the same) dtype + # Columns B and C in df1 and df2 are of incompatible dtypes + df3 = DataFrame({"A": df2_vals, "B": df1_vals, "C": df1_vals}) + df4 = DataFrame({"A": df2_vals, "B": df2_vals, "C": df2_vals}) + + # Check that error raised correctly when merging all columns A, B, and C + # The error message should mention key 'B' + msg = ( + f"You are trying to merge on {df3['B'].dtype} and {df4['B'].dtype} " + "columns for key 'B'. If you wish to proceed you should use pd.concat" + ) + msg = re.escape(msg) + with pytest.raises(ValueError, match=msg): + merge(df3, df4) + + # Check that error raised correctly when merging columns A and C + # The error message should mention key 'C' + msg = ( + f"You are trying to merge on {df3['C'].dtype} and {df4['C'].dtype} " + "columns for key 'C'. If you wish to proceed you should use pd.concat" + ) + msg = re.escape(msg) + with pytest.raises(ValueError, match=msg): + merge(df3, df4, on=["A", "C"]) + @pytest.mark.parametrize( "expected_data, how", [