Skip to content

Commit 6c35a62

Browse files
authored
BUG: Merge with str/StringDtype keys and multiindex (#43734) (#43785)
1 parent 20433be commit 6c35a62

File tree

3 files changed

+38
-2
lines changed

3 files changed

+38
-2
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,7 @@ Reshaping
547547
- Bug in :func:`crosstab` when inputs are are categorical Series, there are categories that are not present in one or both of the Series, and ``margins=True``. Previously the margin value for missing categories was ``NaN``. It is now correctly reported as 0 (:issue:`43505`)
548548
- Bug in :func:`concat` would fail when the ``objs`` argument all had the same index and the ``keys`` argument contained duplicates (:issue:`43595`)
549549
- Bug in :func:`concat` which ignored the ``sort`` parameter (:issue:`43375`)
550+
- Fixed bug in :func:`merge` with :class:`MultiIndex` as column index for the ``on`` argument returning an error when assigning a column internally (:issue:`43734`)
550551

551552
Sparse
552553
^^^^^^

pandas/core/reshape/merge.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1280,10 +1280,12 @@ def _maybe_coerce_merge_keys(self) -> None:
12801280
# incompatible dtypes. See GH 16900.
12811281
if name in self.left.columns:
12821282
typ = lk.categories.dtype if lk_is_cat else object
1283-
self.left = self.left.assign(**{name: self.left[name].astype(typ)})
1283+
self.left = self.left.copy()
1284+
self.left[name] = self.left[name].astype(typ)
12841285
if name in self.right.columns:
12851286
typ = rk.categories.dtype if rk_is_cat else object
1286-
self.right = self.right.assign(**{name: self.right[name].astype(typ)})
1287+
self.right = self.right.copy()
1288+
self.right[name] = self.right[name].astype(typ)
12871289

12881290
def _create_cross_configuration(
12891291
self, left: DataFrame, right: DataFrame

pandas/tests/reshape/merge/test_merge.py

+33
Original file line numberDiff line numberDiff line change
@@ -1695,6 +1695,39 @@ def test_merge_bool_dtype(self, how, expected_data):
16951695
expected = DataFrame(expected_data, columns=["A", "B", "C"])
16961696
tm.assert_frame_equal(result, expected)
16971697

1698+
def test_merge_ea_with_string(self, join_type, string_dtype):
1699+
# GH 43734 Avoid the use of `assign` with multi-index
1700+
df1 = DataFrame(
1701+
data={
1702+
("lvl0", "lvl1-a"): ["1", "2", "3", "4", None],
1703+
("lvl0", "lvl1-b"): ["4", "5", "6", "7", "8"],
1704+
},
1705+
dtype=pd.StringDtype(),
1706+
)
1707+
df1_copy = df1.copy()
1708+
df2 = DataFrame(
1709+
data={
1710+
("lvl0", "lvl1-a"): ["1", "2", "3", pd.NA, "5"],
1711+
("lvl0", "lvl1-c"): ["7", "8", "9", pd.NA, "11"],
1712+
},
1713+
dtype=string_dtype,
1714+
)
1715+
df2_copy = df2.copy()
1716+
merged = merge(left=df1, right=df2, on=[("lvl0", "lvl1-a")], how=join_type)
1717+
1718+
# No change in df1 and df2
1719+
tm.assert_frame_equal(df1, df1_copy)
1720+
tm.assert_frame_equal(df2, df2_copy)
1721+
1722+
# Check the expected types for the merged data frame
1723+
expected = Series(
1724+
[np.dtype("O"), pd.StringDtype(), np.dtype("O")],
1725+
index=MultiIndex.from_tuples(
1726+
[("lvl0", "lvl1-a"), ("lvl0", "lvl1-b"), ("lvl0", "lvl1-c")]
1727+
),
1728+
)
1729+
tm.assert_series_equal(merged.dtypes, expected)
1730+
16981731

16991732
@pytest.fixture
17001733
def left():

0 commit comments

Comments
 (0)