Skip to content

Commit 2748365

Browse files
KevsterAmppre-commit-ci[bot]mroeschke
authored
ENH: Add merge type validation on pandas.merge (#59435)
* add merge type validation on pandas.merge * add ENH to latest whatsnew doc * move merge type validation to _MergeOperation class * add tests on merge validation; add asof as valid mergetype * change merge_type from tuple to dict * change ValueError message to updated change * change Error message on merge type * update test error messages for merge type errors * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 385ce23 commit 2748365

File tree

4 files changed

+24
-1
lines changed

4 files changed

+24
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Other enhancements
3131
- :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
3232
- :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
3333
- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
34+
- :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`)
3435
- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
3536
- :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`).
3637
- :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`)

pandas/core/reshape/merge.py

+8
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,14 @@ def __init__(
982982
)
983983
raise MergeError(msg)
984984

985+
# GH 59435: raise when "how" is not a valid Merge type
986+
merge_type = {"left", "right", "inner", "outer", "cross", "asof"}
987+
if how not in merge_type:
988+
raise ValueError(
989+
f"'{how}' is not a valid Merge type: "
990+
f"left, right, inner, outer, cross, asof"
991+
)
992+
985993
self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on)
986994

987995
(

pandas/tests/frame/methods/test_join.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from datetime import datetime
2+
import re
23
import zoneinfo
34

45
import numpy as np
@@ -276,7 +277,8 @@ def test_join_index(float_frame):
276277
tm.assert_index_equal(joined.index, float_frame.index.sort_values())
277278
tm.assert_index_equal(joined.columns, expected_columns)
278279

279-
with pytest.raises(ValueError, match="join method"):
280+
join_msg = "'foo' is not a valid Merge type: left, right, inner, outer, cross, asof"
281+
with pytest.raises(ValueError, match=re.escape(join_msg)):
280282
f.join(f2, how="foo")
281283

282284
# corner case - overlapping columns

pandas/tests/reshape/merge/test_merge.py

+12
Original file line numberDiff line numberDiff line change
@@ -1456,6 +1456,18 @@ def test_merge_readonly(self):
14561456

14571457
data1.merge(data2) # no error
14581458

1459+
def test_merge_how_validation(self):
1460+
# https://github.com/pandas-dev/pandas/issues/59422
1461+
data1 = DataFrame(
1462+
np.arange(20).reshape((4, 5)) + 1, columns=["a", "b", "c", "d", "e"]
1463+
)
1464+
data2 = DataFrame(
1465+
np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"]
1466+
)
1467+
msg = "'full' is not a valid Merge type: left, right, inner, outer, cross, asof"
1468+
with pytest.raises(ValueError, match=re.escape(msg)):
1469+
data1.merge(data2, how="full")
1470+
14591471

14601472
def _check_merge(x, y):
14611473
for how in ["inner", "left", "outer"]:

0 commit comments

Comments
 (0)