Skip to content

Commit 611cc94

Browse files
committed
BUG: On Join, with a list containing MultiIndexes check uniqueness of index to join (pandas-dev#57676)
1 parent 59f6a33 commit 611cc94

File tree

3 files changed

+32
-3
lines changed

3 files changed

+32
-3
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,7 @@ Other
477477
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
478478
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
479479
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
480+
- Bug in :meth:`DataFrame.join` on join, when deciding to concat or merge a list containing MultiIndexes check uniqueness of individual indexes (:issue:`57676`)
480481
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
481482
- Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
482483
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)

pandas/core/frame.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -10679,7 +10679,19 @@ def join(
1067910679
# "Iterable[Union[DataFrame, Series]]" due to the if statements
1068010680
frames = [cast("DataFrame | Series", self)] + list(other)
1068110681

10682-
can_concat = all(df.index.is_unique for df in frames)
10682+
# We might need to get indexes out of MultiIndexes, checking only the
10683+
# common indexes between the inserted frames
10684+
indexes = (set(df.index.names) for df in frames)
10685+
common_indexes = set.intersection(*indexes)
10686+
10687+
if not common_indexes:
10688+
raise ValueError("cannot join with no overlapping index names")
10689+
10690+
can_concat = False
10691+
for idx in common_indexes:
10692+
can_concat = all(
10693+
df.index.get_level_values(idx).is_unique for df in frames
10694+
)
1068310695

1068410696
# join indexes only using concat
1068510697
if can_concat:

pandas/tests/frame/methods/test_join.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,6 @@ def test_suffix_on_list_join():
107107

108108
# check proper errors are raised
109109
msg = "Suffixes not supported when joining multiple DataFrames"
110-
with pytest.raises(ValueError, match=msg):
111-
first.join([second], lsuffix="y")
112110
with pytest.raises(ValueError, match=msg):
113111
first.join([second, third], rsuffix="x")
114112
with pytest.raises(ValueError, match=msg):
@@ -562,3 +560,21 @@ def test_frame_join_tzaware(self):
562560

563561
tm.assert_index_equal(result.index, expected)
564562
assert result.index.tz.zone == "US/Central"
563+
564+
def test_join_lists_index_with_multiindex(self):
565+
test1 = DataFrame(
566+
{"cat": pd.Categorical(["a", "v", "d"])},
567+
index=Index(["a", "b", "c"], name="y"),
568+
)
569+
test2 = DataFrame(
570+
{"foo": np.arange(6)},
571+
index=MultiIndex.from_tuples(
572+
[(0, "a"), (0, "b"), (0, "c"), (1, "a"), (1, "b"), (1, "c")],
573+
names=("x", "y"),
574+
),
575+
)
576+
577+
result = test2.join([test1])
578+
expected = test2.join(test1)
579+
580+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)