diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index b7efec8fd2e89..b3aa2ac95880d 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -545,6 +545,7 @@ Reshaping - Bug in :func:`crosstab` when inputs are are categorical Series, there are categories that are not present in one or both of the Series, and ``margins=True``. Previously the margin value for missing categories was ``NaN``. It is now correctly reported as 0 (:issue:`43505`) - Bug in :func:`concat` would fail when the ``objs`` argument all had the same index and the ``keys`` argument contained duplicates (:issue:`43595`) - Bug in :func:`concat` which ignored the ``sort`` parameter (:issue:`43375`) +- Fixed bug in :func:`merge` with :class:`MultiIndex` as column index for the ``on`` argument returning an error when assigning a column internally (:issue:`43734`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index fa09f003bc7b8..a88d1dce693f6 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1280,10 +1280,12 @@ def _maybe_coerce_merge_keys(self) -> None: # incompatible dtypes. See GH 16900. if name in self.left.columns: typ = lk.categories.dtype if lk_is_cat else object - self.left = self.left.assign(**{name: self.left[name].astype(typ)}) + self.left = self.left.copy() + self.left[name] = self.left[name].astype(typ) if name in self.right.columns: typ = rk.categories.dtype if rk_is_cat else object - self.right = self.right.assign(**{name: self.right[name].astype(typ)}) + self.right = self.right.copy() + self.right[name] = self.right[name].astype(typ) def _create_cross_configuration( self, left: DataFrame, right: DataFrame diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 3462fa486d936..e42dcfbe38931 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1695,6 +1695,39 @@ def test_merge_bool_dtype(self, how, expected_data): expected = DataFrame(expected_data, columns=["A", "B", "C"]) tm.assert_frame_equal(result, expected) + def test_merge_ea_with_string(self, join_type, string_dtype): + # GH 43734 Avoid the use of `assign` with multi-index + df1 = DataFrame( + data={ + ("lvl0", "lvl1-a"): ["1", "2", "3", "4", None], + ("lvl0", "lvl1-b"): ["4", "5", "6", "7", "8"], + }, + dtype=pd.StringDtype(), + ) + df1_copy = df1.copy() + df2 = DataFrame( + data={ + ("lvl0", "lvl1-a"): ["1", "2", "3", pd.NA, "5"], + ("lvl0", "lvl1-c"): ["7", "8", "9", pd.NA, "11"], + }, + dtype=string_dtype, + ) + df2_copy = df2.copy() + merged = merge(left=df1, right=df2, on=[("lvl0", "lvl1-a")], how=join_type) + + # No change in df1 and df2 + tm.assert_frame_equal(df1, df1_copy) + tm.assert_frame_equal(df2, df2_copy) + + # Check the expected types for the merged data frame + expected = Series( + [np.dtype("O"), pd.StringDtype(), np.dtype("O")], + index=MultiIndex.from_tuples( + [("lvl0", "lvl1-a"), ("lvl0", "lvl1-b"), ("lvl0", "lvl1-c")] + ), + ) + tm.assert_series_equal(merged.dtypes, expected) + @pytest.fixture def left():