diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 124ec8f4ab92c..5815d53d59868 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -21,6 +21,7 @@ Other Enhancements - Indexing of ``DataFrame`` and ``Series`` now accepts zerodim ``np.ndarray`` (:issue:`24919`) - :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`) +- :function:`pandas.merge` now accepts ``None`` as input of suffixes (:issue:`25242`) - :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`) - ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`) - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6b4d95055d06d..513c4bbefd4e1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -273,14 +273,6 @@ 4 bar 2 bar 6 5 baz 3 baz 7 -Merge DataFrames df1 and df2, but raise an exception if the DataFrames have -any overlapping columns. - ->>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=(False, False)) -Traceback (most recent call last): -... -ValueError: columns overlap but no suffix specified: - Index(['value'], dtype='object') """ # ----------------------------------------------------------------------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 407db772d73e8..68fa870d1cc3d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1963,9 +1963,6 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix): if len(to_rename) == 0: return left, right else: - if not lsuffix and not rsuffix: - raise ValueError('columns overlap but no suffix specified: ' - '{rename}'.format(rename=to_rename)) def renamer(x, suffix): """Rename the left and right indices. diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index fb50a3c60f705..ce4693d743800 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -159,13 +159,13 @@ def merge_ordered(left, right, on=None, left DataFrame fill_method : {'ffill', None}, default None Interpolation method for data - suffixes : Sequence, default is ("_x", "_y") + suffixes : Sequence or None, default is ("_x", "_y") A length-2 sequence where each element is optionally a string indicating the suffix to add to overlapping column names in `left` and `right` respectively. Pass a value of `None` instead of a string to indicate that the column name from `left` or - `right` should be left as-is, with no suffix. At least one of the - values must not be None. + `right` should be left as-is, with no suffix. `None` means + keep both name of overlapping columns as-is. .. versionchanged:: 0.25.0 how : {'left', 'right', 'outer', 'inner'}, default 'outer' @@ -495,6 +495,8 @@ def __init__(self, left, right, how='inner', on=None, self.copy = copy self.suffixes = suffixes + if self.suffixes is None: + self.suffixes = (None, None) self.sort = sort self.left_index = left_index diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 0508658766cd3..fd4d3a3daa252 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -97,12 +97,6 @@ def test_join_index(frame): with pytest.raises(ValueError, match='join method'): f.join(f2, how='foo') - # corner case - overlapping columns - msg = 'columns overlap but no suffix' - for how in ('outer', 'left', 'inner'): - with pytest.raises(ValueError, match=msg): - frame.join(frame, how=how) - def test_join_index_more(frame): af = frame.loc[:, ['A', 'B']] diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 7a97368504fd6..0923800afc7f7 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1618,7 +1618,18 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): ("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]), (0, 0, dict(suffixes=["_a", None]), ["0_a", 0]), ("a", "a", dict(), ["a_x", "a_y"]), - (0, 0, dict(), ["0_x", "0_y"]) + (0, 0, dict(), ["0_x", "0_y"]), + # accept 2-length None alike suffixes input + (0, 0, dict(suffixes=[None, None]), [0, 0]), + (0, 0, dict(suffixes=(None, '')), [0, '0']), + (0, 0, dict(suffixes=['', '']), ['0', '0']), + ("a", "a", dict(suffixes=[None, None]), ["a", "a"]), + ("a", "a", dict(suffixes=["", None]), ["a", "a"]), + ("a", "a", dict(suffixes=(None, None)), ["a", "a"]), + ("a", "a", dict(suffixes=('', '')), ["a", "a"]), + # accept None as suffixes + (0, 0, dict(suffixes=None), [0, 0]), + ("a", "a", dict(suffixes=None), ["a", "a"]) ]) def test_merge_suffix(col1, col2, kwargs, expected_cols): # issue: 24782 @@ -1633,36 +1644,3 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols): result = pd.merge(a, b, left_index=True, right_index=True, **kwargs) tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize("col1, col2, suffixes", [ - ("a", "a", [None, None]), - ("a", "a", (None, None)), - ("a", "a", ("", None)), - (0, 0, [None, None]), - (0, 0, (None, "")) -]) -def test_merge_suffix_error(col1, col2, suffixes): - # issue: 24782 - a = pd.DataFrame({col1: [1, 2, 3]}) - b = pd.DataFrame({col2: [3, 4, 5]}) - - # TODO: might reconsider current raise behaviour, see issue 24782 - msg = "columns overlap but no suffix specified" - with pytest.raises(ValueError, match=msg): - pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) - - -@pytest.mark.parametrize("col1, col2, suffixes", [ - ("a", "a", None), - (0, 0, None) -]) -def test_merge_suffix_none_error(col1, col2, suffixes): - # issue: 24782 - a = pd.DataFrame({col1: [1, 2, 3]}) - b = pd.DataFrame({col2: [3, 4, 5]}) - - # TODO: might reconsider current raise behaviour, see GH24782 - msg = "iterable" - with pytest.raises(TypeError, match=msg): - pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 7e8b5b1120bc6..9d4c2e5b305b6 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -508,12 +508,6 @@ def test_join_multi_levels(self): with pytest.raises(ValueError): household.join(portfolio, how='inner') - portfolio2 = portfolio.copy() - portfolio2.index.set_names(['household_id', 'foo']) - - with pytest.raises(ValueError): - portfolio2.join(portfolio, how='inner') - def test_join_multi_levels2(self): # some more advanced merges