Skip to content

Commit eb48e10

Browse files
API: Allow non-tuples in pandas.merge (#34810)
1 parent c7bc342 commit eb48e10

File tree

4 files changed

+23
-20
lines changed

4 files changed

+23
-20
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -693,7 +693,6 @@ Other API changes
693693
- :func: `pandas.api.dtypes.is_string_dtype` no longer incorrectly identifies categorical series as string.
694694
- :func:`read_excel` no longer takes ``**kwds`` arguments. This means that passing in keyword ``chunksize`` now raises a ``TypeError``
695695
(previously raised a ``NotImplementedError``), while passing in keyword ``encoding`` now raises a ``TypeError`` (:issue:`34464`)
696-
- :func: `merge` now checks ``suffixes`` parameter type to be ``tuple`` and raises ``TypeError``, whereas before a ``list`` or ``set`` were accepted and that the ``set`` could produce unexpected results (:issue:`33740`)
697696
- :class:`Period` no longer accepts tuples for the ``freq`` argument (:issue:`34658`)
698697
- :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` now raises ValueError if ``limit_direction`` is 'forward' or 'both' and ``method`` is 'backfill' or 'bfill' or ``limit_direction`` is 'backward' or 'both' and ``method`` is 'pad' or 'ffill' (:issue:`34746`)
699698
- The :class:`DataFrame` constructor no longer accepts a list of ``DataFrame`` objects. Because of changes to NumPy, ``DataFrame`` objects are now consistently treated as 2D objects, so a list of ``DataFrames`` is considered 3D, and no longer acceptible for the ``DataFrame`` constructor (:issue:`32289`).
@@ -787,6 +786,7 @@ Deprecations
787786
- :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`)
788787
- :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`)
789788
- The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`)
789+
- Providing ``suffixes`` as a ``set`` in :func:`pandas.merge` is deprecated. Provide a tuple instead (:issue:`33740`, :issue:`34741`).
790790
- :meth:`Index.is_mixed` is deprecated and will be removed in a future version, check ``index.inferred_type`` directly instead (:issue:`32922`)
791791

792792
- Passing any arguments but the first one to :func:`read_html` as

pandas/core/frame.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -227,10 +227,13 @@
227227
sort : bool, default False
228228
Sort the join keys lexicographically in the result DataFrame. If False,
229229
the order of the join keys depends on the join type (how keyword).
230-
suffixes : tuple of (str, str), default ('_x', '_y')
231-
Suffix to apply to overlapping column names in the left and right
232-
side, respectively. To raise an exception on overlapping columns use
233-
(False, False).
230+
suffixes : list-like, default is ("_x", "_y")
231+
A length-2 sequence where each element is optionally a string
232+
indicating the suffix to add to overlapping column names in
233+
`left` and `right` respectively. Pass a value of `None` instead
234+
of a string to indicate that the column name from `left` or
235+
`right` should be left as-is, with no suffix. At least one of the
236+
values must not be None.
234237
copy : bool, default True
235238
If False, avoid copy if possible.
236239
indicator : bool or str, default False

pandas/core/reshape/merge.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def merge_ordered(
194194
left DataFrame.
195195
fill_method : {'ffill', None}, default None
196196
Interpolation method for data.
197-
suffixes : Sequence, default is ("_x", "_y")
197+
suffixes : list-like, default is ("_x", "_y")
198198
A length-2 sequence where each element is optionally a string
199199
indicating the suffix to add to overlapping column names in
200200
`left` and `right` respectively. Pass a value of `None` instead
@@ -2072,9 +2072,13 @@ def _items_overlap_with_suffix(left: Index, right: Index, suffixes: Tuple[str, s
20722072
If corresponding suffix is empty, the entry is simply converted to string.
20732073
20742074
"""
2075-
if not isinstance(suffixes, tuple):
2076-
raise TypeError(
2077-
f"suffixes should be tuple of (str, str). But got {type(suffixes).__name__}"
2075+
if not is_list_like(suffixes, allow_sets=False):
2076+
warnings.warn(
2077+
f"Passing 'suffixes' as a {type(suffixes)}, is not supported and may give "
2078+
"unexpected results. Provide 'suffixes' as a tuple instead. In the "
2079+
"future a 'TypeError' will be raised.",
2080+
FutureWarning,
2081+
stacklevel=4,
20782082
)
20792083

20802084
to_rename = left.intersection(right)

pandas/tests/reshape/merge/test_merge.py

+7-11
Original file line numberDiff line numberDiff line change
@@ -1999,6 +1999,7 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm):
19991999
(0, 0, dict(suffixes=("", "_dup")), ["0", "0_dup"]),
20002000
(0, 0, dict(suffixes=(None, "_dup")), [0, "0_dup"]),
20012001
(0, 0, dict(suffixes=("_x", "_y")), ["0_x", "0_y"]),
2002+
(0, 0, dict(suffixes=["_x", "_y"]), ["0_x", "0_y"]),
20022003
("a", 0, dict(suffixes=(None, "_y")), ["a", 0]),
20032004
(0.0, 0.0, dict(suffixes=("_x", None)), ["0.0_x", 0.0]),
20042005
("b", "b", dict(suffixes=(None, "_y")), ["b", "b_y"]),
@@ -2069,18 +2070,13 @@ def test_merge_suffix_error(col1, col2, suffixes):
20692070
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
20702071

20712072

2072-
@pytest.mark.parametrize(
2073-
"col1, col2, suffixes", [("a", "a", {"a", "b"}), ("a", "a", None), (0, 0, None)],
2074-
)
2075-
def test_merge_suffix_type_error(col1, col2, suffixes):
2076-
a = pd.DataFrame({col1: [1, 2, 3]})
2077-
b = pd.DataFrame({col2: [3, 4, 5]})
2073+
@pytest.mark.parametrize("suffixes", [{"left", "right"}, {"left": 0, "right": 0}])
2074+
def test_merge_suffix_warns(suffixes):
2075+
a = pd.DataFrame({"a": [1, 2, 3]})
2076+
b = pd.DataFrame({"b": [3, 4, 5]})
20782077

2079-
msg = (
2080-
f"suffixes should be tuple of \\(str, str\\). But got {type(suffixes).__name__}"
2081-
)
2082-
with pytest.raises(TypeError, match=msg):
2083-
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
2078+
with tm.assert_produces_warning(FutureWarning):
2079+
pd.merge(a, b, left_index=True, right_index=True, suffixes={"left", "right"})
20842080

20852081

20862082
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)