Skip to content

Commit 67a6135

Browse files
authored
DEPR: Enforce disallowed merging scenarios (#49429)
* Enforce merge suffixes tuples * Duplicate merge columns * enforce disallowing different levels * Fix test
1 parent b858de0 commit 67a6135

File tree

5 files changed

+54
-68
lines changed

5 files changed

+54
-68
lines changed

doc/source/whatsnew/v2.0.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,9 @@ Removal of prior version deprecations/changes
273273
- Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
274274
- Enforced disallowing a tuple of column labels into :meth:`.DataFrameGroupBy.__getitem__` (:issue:`30546`)
275275
- Enforced disallowing setting values with ``.loc`` using a positional slice. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
276+
- Enforced disallowing ``dict`` or ``set`` objects in ``suffixes`` in :func:`merge` (:issue:`34810`)
277+
- Enforced disallowing :func:`merge` to produce duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`)
278+
- Enforced disallowing using :func:`merge` or :func:`join` on a different number of levels (:issue:`34862`)
276279
- Removed setting Categorical._codes directly (:issue:`41429`)
277280
- Removed setting Categorical.categories directly (:issue:`47834`)
278281
- Removed argument ``inplace`` from :meth:`Categorical.add_categories`, :meth:`Categorical.remove_categories`, :meth:`Categorical.set_categories`, :meth:`Categorical.rename_categories`, :meth:`Categorical.reorder_categories`, :meth:`Categorical.set_ordered`, :meth:`Categorical.as_ordered`, :meth:`Categorical.as_unordered` (:issue:`37981`, :issue:`41118`, :issue:`41133`, :issue:`47834`)

pandas/core/reshape/merge.py

+11-18
Original file line numberDiff line numberDiff line change
@@ -676,16 +676,14 @@ def __init__(
676676
f"right_index parameter must be of type bool, not {type(right_index)}"
677677
)
678678

679-
# warn user when merging between different levels
679+
# GH 40993: raise when merging between different levels; enforced in 2.0
680680
if _left.columns.nlevels != _right.columns.nlevels:
681681
msg = (
682-
"merging between different levels is deprecated and will be removed "
683-
f"in a future version. ({_left.columns.nlevels} levels on the left, "
682+
"Not allowed to merge between different levels. "
683+
f"({_left.columns.nlevels} levels on the left, "
684684
f"{_right.columns.nlevels} on the right)"
685685
)
686-
# stacklevel chosen to be correct when this is reached via pd.merge
687-
# (and not DataFrame.join)
688-
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
686+
raise MergeError(msg)
689687

690688
self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on)
691689

@@ -2475,13 +2473,10 @@ def _items_overlap_with_suffix(
24752473
If corresponding suffix is empty, the entry is simply converted to string.
24762474
24772475
"""
2478-
if not is_list_like(suffixes, allow_sets=False):
2479-
warnings.warn(
2480-
f"Passing 'suffixes' as a {type(suffixes)}, is not supported and may give "
2481-
"unexpected results. Provide 'suffixes' as a tuple instead. In the "
2482-
"future a 'TypeError' will be raised.",
2483-
FutureWarning,
2484-
stacklevel=find_stack_level(),
2476+
if not is_list_like(suffixes, allow_sets=False) or isinstance(suffixes, dict):
2477+
raise TypeError(
2478+
f"Passing 'suffixes' as a {type(suffixes)}, is not supported. "
2479+
"Provide 'suffixes' as a tuple instead."
24852480
)
24862481

24872482
to_rename = left.intersection(right)
@@ -2527,11 +2522,9 @@ def renamer(x, suffix):
25272522
if not rlabels.is_unique:
25282523
dups.extend(rlabels[(rlabels.duplicated()) & (~right.duplicated())].tolist())
25292524
if dups:
2530-
warnings.warn(
2531-
f"Passing 'suffixes' which cause duplicate columns {set(dups)} in the "
2532-
f"result is deprecated and will raise a MergeError in a future version.",
2533-
FutureWarning,
2534-
stacklevel=find_stack_level(),
2525+
raise MergeError(
2526+
f"Passing 'suffixes' which cause duplicate columns {set(dups)} is "
2527+
f"not allowed.",
25352528
)
25362529

25372530
return llabels, rlabels

pandas/tests/frame/methods/test_join.py

+10-13
Original file line numberDiff line numberDiff line change
@@ -516,8 +516,9 @@ def test_join_multiindex_dates(self):
516516

517517
tm.assert_equal(result, expected)
518518

519-
def test_merge_join_different_levels(self):
519+
def test_merge_join_different_levels_raises(self):
520520
# GH#9455
521+
# GH 40993: For raising, enforced in 2.0
521522

522523
# first dataframe
523524
df1 = DataFrame(columns=["a", "b"], data=[[1, 11], [0, 22]])
@@ -527,20 +528,16 @@ def test_merge_join_different_levels(self):
527528
df2 = DataFrame(columns=columns, data=[[1, 33], [0, 44]])
528529

529530
# merge
530-
columns = ["a", "b", ("c", "c1")]
531-
expected = DataFrame(columns=columns, data=[[1, 11, 33], [0, 22, 44]])
532-
with tm.assert_produces_warning(FutureWarning):
533-
result = pd.merge(df1, df2, on="a")
534-
tm.assert_frame_equal(result, expected)
531+
with pytest.raises(
532+
MergeError, match="Not allowed to merge between different levels"
533+
):
534+
pd.merge(df1, df2, on="a")
535535

536536
# join, see discussion in GH#12219
537-
columns = ["a", "b", ("a", ""), ("c", "c1")]
538-
expected = DataFrame(columns=columns, data=[[1, 11, 0, 44], [0, 22, 1, 33]])
539-
msg = "merging between different levels is deprecated"
540-
with tm.assert_produces_warning(FutureWarning, match=msg):
541-
# stacklevel is chosen to be correct for pd.merge, not DataFrame.join
542-
result = df1.join(df2, on="a")
543-
tm.assert_frame_equal(result, expected)
537+
with pytest.raises(
538+
MergeError, match="Not allowed to merge between different levels"
539+
):
540+
df1.join(df2, on="a")
544541

545542
def test_frame_join_tzaware(self):
546543
test1 = DataFrame(

pandas/tests/reshape/merge/test_join.py

+12-11
Original file line numberDiff line numberDiff line change
@@ -420,18 +420,18 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex):
420420

421421
# _assert_same_contents(expected, expected2.loc[:, expected.columns])
422422

423-
def test_join_hierarchical_mixed(self):
423+
def test_join_hierarchical_mixed_raises(self):
424424
# GH 2024
425+
# GH 40993: For raising, enforced in 2.0
425426
df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"])
426427
new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]})
427428
other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"])
428429
other_df.set_index("a", inplace=True)
429430
# GH 9455, 12219
430-
msg = "merging between different levels is deprecated"
431-
with tm.assert_produces_warning(FutureWarning, match=msg):
432-
result = merge(new_df, other_df, left_index=True, right_index=True)
433-
assert ("b", "mean") in result
434-
assert "b" in result
431+
with pytest.raises(
432+
pd.errors.MergeError, match="Not allowed to merge between different levels"
433+
):
434+
merge(new_df, other_df, left_index=True, right_index=True)
435435

436436
def test_join_float64_float32(self):
437437

@@ -642,11 +642,12 @@ def test_join_dups(self):
642642
dta = x.merge(y, left_index=True, right_index=True).merge(
643643
z, left_index=True, right_index=True, how="outer"
644644
)
645-
with tm.assert_produces_warning(FutureWarning):
646-
dta = dta.merge(w, left_index=True, right_index=True)
647-
expected = concat([x, y, z, w], axis=1)
648-
expected.columns = ["x_x", "y_x", "x_y", "y_y", "x_x", "y_x", "x_y", "y_y"]
649-
tm.assert_frame_equal(dta, expected)
645+
# GH 40991: As of 2.0 causes duplicate columns
646+
with pytest.raises(
647+
pd.errors.MergeError,
648+
match="Passing 'suffixes' which cause duplicate columns",
649+
):
650+
dta.merge(w, left_index=True, right_index=True)
650651

651652
def test_join_multi_to_multi(self, join_type):
652653
# GH 20475

pandas/tests/reshape/merge/test_merge.py

+18-26
Original file line numberDiff line numberDiff line change
@@ -2207,6 +2207,7 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm):
22072207

22082208
def test_merge_series_multilevel():
22092209
# GH#47946
2210+
# GH 40993: For raising, enforced in 2.0
22102211
a = DataFrame(
22112212
{"A": [1, 2, 3, 4]},
22122213
index=MultiIndex.from_product([["a", "b"], [0, 1]], names=["outer", "inner"]),
@@ -2216,13 +2217,10 @@ def test_merge_series_multilevel():
22162217
index=MultiIndex.from_product([["a", "b"], [1, 2]], names=["outer", "inner"]),
22172218
name=("B", "C"),
22182219
)
2219-
expected = DataFrame(
2220-
{"A": [2, 4], ("B", "C"): [1, 3]},
2221-
index=MultiIndex.from_product([["a", "b"], [1]], names=["outer", "inner"]),
2222-
)
2223-
with tm.assert_produces_warning(FutureWarning):
2224-
result = merge(a, b, on=["outer", "inner"])
2225-
tm.assert_frame_equal(result, expected)
2220+
with pytest.raises(
2221+
MergeError, match="Not allowed to merge between different levels"
2222+
):
2223+
merge(a, b, on=["outer", "inner"])
22262224

22272225

22282226
@pytest.mark.parametrize(
@@ -2303,12 +2301,12 @@ def test_merge_suffix_error(col1, col2, suffixes):
23032301

23042302

23052303
@pytest.mark.parametrize("suffixes", [{"left", "right"}, {"left": 0, "right": 0}])
2306-
def test_merge_suffix_warns(suffixes):
2304+
def test_merge_suffix_raises(suffixes):
23072305
a = DataFrame({"a": [1, 2, 3]})
23082306
b = DataFrame({"b": [3, 4, 5]})
23092307

2310-
with tm.assert_produces_warning(FutureWarning):
2311-
merge(a, b, left_index=True, right_index=True, suffixes={"left", "right"})
2308+
with pytest.raises(TypeError, match="Passing 'suffixes' as a"):
2309+
merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
23122310

23132311

23142312
@pytest.mark.parametrize(
@@ -2609,20 +2607,16 @@ def test_merge_result_empty_index_and_on():
26092607
tm.assert_frame_equal(result, expected)
26102608

26112609

2612-
def test_merge_suffixes_produce_dup_columns_warns():
2613-
# GH#22818
2610+
def test_merge_suffixes_produce_dup_columns_raises():
2611+
# GH#22818; Enforced in 2.0
26142612
left = DataFrame({"a": [1, 2, 3], "b": 1, "b_x": 2})
26152613
right = DataFrame({"a": [1, 2, 3], "b": 2})
2616-
expected = DataFrame(
2617-
[[1, 1, 2, 2], [2, 1, 2, 2], [3, 1, 2, 2]], columns=["a", "b_x", "b_x", "b_y"]
2618-
)
2619-
with tm.assert_produces_warning(FutureWarning):
2620-
result = merge(left, right, on="a")
2621-
tm.assert_frame_equal(result, expected)
26222614

2623-
with tm.assert_produces_warning(FutureWarning):
2615+
with pytest.raises(MergeError, match="Passing 'suffixes' which cause duplicate"):
2616+
merge(left, right, on="a")
2617+
2618+
with pytest.raises(MergeError, match="Passing 'suffixes' which cause duplicate"):
26242619
merge(right, left, on="a", suffixes=("_y", "_x"))
2625-
tm.assert_frame_equal(result, expected)
26262620

26272621

26282622
def test_merge_duplicate_columns_with_suffix_no_warning():
@@ -2635,15 +2629,13 @@ def test_merge_duplicate_columns_with_suffix_no_warning():
26352629
tm.assert_frame_equal(result, expected)
26362630

26372631

2638-
def test_merge_duplicate_columns_with_suffix_causing_another_duplicate():
2639-
# GH#22818
2632+
def test_merge_duplicate_columns_with_suffix_causing_another_duplicate_raises():
2633+
# GH#22818, Enforced in 2.0
26402634
# This should raise warning because suffixes cause another collision
26412635
left = DataFrame([[1, 1, 1, 1], [2, 2, 2, 2]], columns=["a", "b", "b", "b_x"])
26422636
right = DataFrame({"a": [1, 3], "b": 2})
2643-
with tm.assert_produces_warning(FutureWarning):
2644-
result = merge(left, right, on="a")
2645-
expected = DataFrame([[1, 1, 1, 1, 2]], columns=["a", "b_x", "b_x", "b_x", "b_y"])
2646-
tm.assert_frame_equal(result, expected)
2637+
with pytest.raises(MergeError, match="Passing 'suffixes' which cause duplicate"):
2638+
merge(left, right, on="a")
26472639

26482640

26492641
def test_merge_string_float_column_result():

0 commit comments

Comments
 (0)