Skip to content

Commit 02d7aa9

Browse files
committed
BUG: On Join, with a list containing MultiIndexes check uniqueness of index to join (pandas-dev#57676)
1 parent b162331 commit 02d7aa9

File tree

4 files changed

+33
-4
lines changed

4 files changed

+33
-4
lines changed

doc/source/user_guide/merging.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ to join them together on their indexes.
911911

912912
.. ipython:: python
913913
914-
right2 = pd.DataFrame({"v": [7, 8, 9]}, index=["K1", "K1", "K2"])
914+
right2 = pd.DataFrame({"v": [7, 8, 9]}, index=["K1", "K1", "K2"], name="key1")
915915
result = left.join([right, right2])
916916
917917
.. ipython:: python

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@ Other
501501
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
502502
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
503503
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
504+
- Bug in :meth:`DataFrame.join` on join, when deciding to concat or merge a list containing MultiIndexes check uniqueness of individual indexes (:issue:`57676`)
504505
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
505506
- Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
506507
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)

pandas/core/frame.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -10713,7 +10713,19 @@ def join(
1071310713
# "Iterable[Union[DataFrame, Series]]" due to the if statements
1071410714
frames = [cast("DataFrame | Series", self)] + list(other)
1071510715

10716-
can_concat = all(df.index.is_unique for df in frames)
10716+
# We might need to get indexes out of MultiIndexes, checking only the
10717+
# common indexes between the inserted frames
10718+
indexes = (set(df.index.names) for df in frames)
10719+
common_indexes = set.intersection(*indexes)
10720+
10721+
if not common_indexes:
10722+
raise ValueError("cannot join with no overlapping index names")
10723+
10724+
can_concat = False
10725+
for idx in common_indexes:
10726+
can_concat = all(
10727+
df.index.get_level_values(idx).is_unique for df in frames
10728+
)
1071710729

1071810730
# join indexes only using concat
1071910731
if can_concat:

pandas/tests/frame/methods/test_join.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,6 @@ def test_suffix_on_list_join():
107107

108108
# check proper errors are raised
109109
msg = "Suffixes not supported when joining multiple DataFrames"
110-
with pytest.raises(ValueError, match=msg):
111-
first.join([second], lsuffix="y")
112110
with pytest.raises(ValueError, match=msg):
113111
first.join([second, third], rsuffix="x")
114112
with pytest.raises(ValueError, match=msg):
@@ -562,3 +560,21 @@ def test_frame_join_tzaware(self):
562560

563561
tm.assert_index_equal(result.index, expected)
564562
assert result.index.tz.zone == "US/Central"
563+
564+
def test_join_lists_index_with_multiindex(self):
565+
test1 = DataFrame(
566+
{"cat": pd.Categorical(["a", "v", "d"])},
567+
index=Index(["a", "b", "c"], name="y"),
568+
)
569+
test2 = DataFrame(
570+
{"foo": np.arange(6)},
571+
index=MultiIndex.from_tuples(
572+
[(0, "a"), (0, "b"), (0, "c"), (1, "a"), (1, "b"), (1, "c")],
573+
names=("x", "y"),
574+
),
575+
)
576+
577+
result = test2.join([test1])
578+
expected = test2.join(test1)
579+
580+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)