Skip to content

Commit 0755915

Browse files
authored
ENH: Improve error reporting for wrong merge cols (#37547)
1 parent 5fd478d commit 0755915

File tree

3 files changed

+74
-4
lines changed

3 files changed

+74
-4
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ Other enhancements
231231
- :class:`Window` now supports all Scipy window types in ``win_type`` with flexible keyword argument support (:issue:`34556`)
232232
- :meth:`testing.assert_index_equal` now has a ``check_order`` parameter that allows indexes to be checked in an order-insensitive manner (:issue:`37478`)
233233
- :func:`read_csv` supports memory-mapping for compressed files (:issue:`37621`)
234+
- Improve error reporting for :meth:`DataFrame.merge()` when invalid merge column definitions were given (:issue:`16228`)
234235

235236
.. _whatsnew_120.api_breaking.python:
236237

pandas/core/reshape/merge.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -1203,11 +1203,9 @@ def _validate_specification(self):
12031203
if self.left_index and self.right_index:
12041204
self.left_on, self.right_on = (), ()
12051205
elif self.left_index:
1206-
if self.right_on is None:
1207-
raise MergeError("Must pass right_on or right_index=True")
1206+
raise MergeError("Must pass right_on or right_index=True")
12081207
elif self.right_index:
1209-
if self.left_on is None:
1210-
raise MergeError("Must pass left_on or left_index=True")
1208+
raise MergeError("Must pass left_on or left_index=True")
12111209
else:
12121210
# use the common columns
12131211
common_cols = self.left.columns.intersection(self.right.columns)
@@ -1228,8 +1226,19 @@ def _validate_specification(self):
12281226
'Can only pass argument "on" OR "left_on" '
12291227
'and "right_on", not a combination of both.'
12301228
)
1229+
if self.left_index or self.right_index:
1230+
raise MergeError(
1231+
'Can only pass argument "on" OR "left_index" '
1232+
'and "right_index", not a combination of both.'
1233+
)
12311234
self.left_on = self.right_on = self.on
12321235
elif self.left_on is not None:
1236+
if self.left_index:
1237+
raise MergeError(
1238+
'Can only pass argument "left_on" OR "left_index" not both.'
1239+
)
1240+
if not self.right_index and self.right_on is None:
1241+
raise MergeError('Must pass "right_on" OR "right_index".')
12331242
n = len(self.left_on)
12341243
if self.right_index:
12351244
if len(self.left_on) != self.right.index.nlevels:
@@ -1239,6 +1248,12 @@ def _validate_specification(self):
12391248
)
12401249
self.right_on = [None] * n
12411250
elif self.right_on is not None:
1251+
if self.right_index:
1252+
raise MergeError(
1253+
'Can only pass argument "right_on" OR "right_index" not both.'
1254+
)
1255+
if not self.left_index and self.left_on is None:
1256+
raise MergeError('Must pass "left_on" OR "left_index".')
12421257
n = len(self.right_on)
12431258
if self.left_index:
12441259
if len(self.right_on) != self.left.index.nlevels:

pandas/tests/reshape/merge/test_merge.py

+54
Original file line numberDiff line numberDiff line change
@@ -2283,3 +2283,57 @@ def test_merge_join_categorical_multiindex():
22832283
expected = expected.drop(["Cat", "Int"], axis=1)
22842284
result = a.join(b, on=["Cat1", "Int1"])
22852285
tm.assert_frame_equal(expected, result)
2286+
2287+
2288+
@pytest.mark.parametrize("func", ["merge", "merge_asof"])
2289+
@pytest.mark.parametrize(
2290+
("kwargs", "err_msg"),
2291+
[
2292+
({"left_on": "a", "left_index": True}, ["left_on", "left_index"]),
2293+
({"right_on": "a", "right_index": True}, ["right_on", "right_index"]),
2294+
],
2295+
)
2296+
def test_merge_join_cols_error_reporting_duplicates(func, kwargs, err_msg):
2297+
# GH: 16228
2298+
left = DataFrame({"a": [1, 2], "b": [3, 4]})
2299+
right = DataFrame({"a": [1, 1], "c": [5, 6]})
2300+
msg = rf'Can only pass argument "{err_msg[0]}" OR "{err_msg[1]}" not both\.'
2301+
with pytest.raises(MergeError, match=msg):
2302+
getattr(pd, func)(left, right, **kwargs)
2303+
2304+
2305+
@pytest.mark.parametrize("func", ["merge", "merge_asof"])
2306+
@pytest.mark.parametrize(
2307+
("kwargs", "err_msg"),
2308+
[
2309+
({"left_on": "a"}, ["right_on", "right_index"]),
2310+
({"right_on": "a"}, ["left_on", "left_index"]),
2311+
],
2312+
)
2313+
def test_merge_join_cols_error_reporting_missing(func, kwargs, err_msg):
2314+
# GH: 16228
2315+
left = DataFrame({"a": [1, 2], "b": [3, 4]})
2316+
right = DataFrame({"a": [1, 1], "c": [5, 6]})
2317+
msg = rf'Must pass "{err_msg[0]}" OR "{err_msg[1]}"\.'
2318+
with pytest.raises(MergeError, match=msg):
2319+
getattr(pd, func)(left, right, **kwargs)
2320+
2321+
2322+
@pytest.mark.parametrize("func", ["merge", "merge_asof"])
2323+
@pytest.mark.parametrize(
2324+
"kwargs",
2325+
[
2326+
{"right_index": True},
2327+
{"left_index": True},
2328+
],
2329+
)
2330+
def test_merge_join_cols_error_reporting_on_and_index(func, kwargs):
2331+
# GH: 16228
2332+
left = DataFrame({"a": [1, 2], "b": [3, 4]})
2333+
right = DataFrame({"a": [1, 1], "c": [5, 6]})
2334+
msg = (
2335+
r'Can only pass argument "on" OR "left_index" '
2336+
r'and "right_index", not a combination of both\.'
2337+
)
2338+
with pytest.raises(MergeError, match=msg):
2339+
getattr(pd, func)(left, right, on="a", **kwargs)

0 commit comments

Comments
 (0)