Skip to content

Commit 693fb71

Browse files
authored
ENH: When merging with incompatible keys, include the first incompatible key in the error message (#51947)
* Modified merge error message to indicate first incompatible column * Added corresponding entry in v2.1.0.rst
1 parent 5053b56 commit 693fb71

File tree

4 files changed

+38
-12
lines changed

4 files changed

+38
-12
lines changed

doc/source/whatsnew/v2.1.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ Other enhancements
3232
- :meth:`MultiIndex.sortlevel` and :meth:`Index.sortlevel` gained a new keyword ``na_position`` (:issue:`51612`)
3333
- Improve error message when setting :class:`DataFrame` with wrong number of columns through :meth:`DataFrame.isetitem` (:issue:`51701`)
3434
- Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`)
35-
- :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`).
35+
- :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`)
36+
- Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
3637

3738
.. ---------------------------------------------------------------------------
3839
.. _whatsnew_210.notable_bug_fixes:

pandas/core/reshape/merge.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1297,8 +1297,8 @@ def _maybe_coerce_merge_keys(self) -> None:
12971297
continue
12981298

12991299
msg = (
1300-
f"You are trying to merge on {lk.dtype} and "
1301-
f"{rk.dtype} columns. If you wish to proceed you should use pd.concat"
1300+
f"You are trying to merge on {lk.dtype} and {rk.dtype} columns "
1301+
f"for key '{name}'. If you wish to proceed you should use pd.concat"
13021302
)
13031303

13041304
# if we are numeric, then allow differing

pandas/tests/reshape/merge/test_join.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,9 @@ def test_join_on(self, target_source):
141141

142142
# overlap
143143
source_copy = source.copy()
144-
source_copy["A"] = 0
145144
msg = (
146-
"You are trying to merge on float64 and object columns. If "
147-
"you wish to proceed you should use pd.concat"
145+
"You are trying to merge on float64 and object columns for key 'A'. "
146+
"If you wish to proceed you should use pd.concat"
148147
)
149148
with pytest.raises(ValueError, match=msg):
150149
target.join(source_copy, on="A")

pandas/tests/reshape/merge/test_merge.py

+32-6
Original file line numberDiff line numberDiff line change
@@ -1630,24 +1630,50 @@ def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals):
16301630
df2 = DataFrame({"A": df2_vals})
16311631

16321632
msg = (
1633-
f"You are trying to merge on {df1['A'].dtype} and "
1634-
f"{df2['A'].dtype} columns. If you wish to proceed "
1635-
"you should use pd.concat"
1633+
f"You are trying to merge on {df1['A'].dtype} and {df2['A'].dtype} "
1634+
"columns for key 'A'. If you wish to proceed you should use pd.concat"
16361635
)
16371636
msg = re.escape(msg)
16381637
with pytest.raises(ValueError, match=msg):
16391638
merge(df1, df2, on=["A"])
16401639

16411640
# Check that error still raised when swapping order of dataframes
16421641
msg = (
1643-
f"You are trying to merge on {df2['A'].dtype} and "
1644-
f"{df1['A'].dtype} columns. If you wish to proceed "
1645-
"you should use pd.concat"
1642+
f"You are trying to merge on {df2['A'].dtype} and {df1['A'].dtype} "
1643+
"columns for key 'A'. If you wish to proceed you should use pd.concat"
16461644
)
16471645
msg = re.escape(msg)
16481646
with pytest.raises(ValueError, match=msg):
16491647
merge(df2, df1, on=["A"])
16501648

1649+
# Check that error still raised when merging on multiple columns
1650+
# The error message should mention the first incompatible column
1651+
if len(df1_vals) == len(df2_vals):
1652+
# Column A in df1 and df2 is of compatible (the same) dtype
1653+
# Columns B and C in df1 and df2 are of incompatible dtypes
1654+
df3 = DataFrame({"A": df2_vals, "B": df1_vals, "C": df1_vals})
1655+
df4 = DataFrame({"A": df2_vals, "B": df2_vals, "C": df2_vals})
1656+
1657+
# Check that error raised correctly when merging all columns A, B, and C
1658+
# The error message should mention key 'B'
1659+
msg = (
1660+
f"You are trying to merge on {df3['B'].dtype} and {df4['B'].dtype} "
1661+
"columns for key 'B'. If you wish to proceed you should use pd.concat"
1662+
)
1663+
msg = re.escape(msg)
1664+
with pytest.raises(ValueError, match=msg):
1665+
merge(df3, df4)
1666+
1667+
# Check that error raised correctly when merging columns A and C
1668+
# The error message should mention key 'C'
1669+
msg = (
1670+
f"You are trying to merge on {df3['C'].dtype} and {df4['C'].dtype} "
1671+
"columns for key 'C'. If you wish to proceed you should use pd.concat"
1672+
)
1673+
msg = re.escape(msg)
1674+
with pytest.raises(ValueError, match=msg):
1675+
merge(df3, df4, on=["A", "C"])
1676+
16511677
@pytest.mark.parametrize(
16521678
"expected_data, how",
16531679
[

0 commit comments

Comments
 (0)