diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index f3b849dc6de45..49ea2f39adb1a 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -911,7 +911,7 @@ to join them together on their indexes. .. ipython:: python - right2 = pd.DataFrame({"v": [7, 8, 9]}, index=["K1", "K1", "K2"]) + right2 = pd.DataFrame({"v": [7, 8, 9]}, index=pd.Index(["K1", "K1", "K2"], name="key1")) result = left.join([right, right2]) .. ipython:: python diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6a6abcf2d48fe..1062d6f9bc5bd 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -501,6 +501,7 @@ Other - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`) +- Bug in :meth:`DataFrame.join` on join, when deciding to concat or merge a list containing MultiIndexes check uniqueness of individual indexes (:issue:`57676`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`) - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 01ac5a2be3d79..ac3ba08189086 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10713,7 +10713,19 @@ def join( # "Iterable[Union[DataFrame, Series]]" due to the if statements frames = [cast("DataFrame | Series", self)] + list(other) - can_concat = all(df.index.is_unique for df in frames) + # We might need to get indexes out of MultiIndexes, checking only the + # common indexes between the inserted frames + indexes = (set(df.index.names) for df in frames) + common_indexes = set.intersection(*indexes) + + if not common_indexes: + raise ValueError("cannot join with no overlapping index names") + + can_concat = False + for idx in common_indexes: + can_concat = all( + df.index.get_level_values(idx).is_unique for df in frames + ) # join indexes only using concat if can_concat: diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 82802dd6e99eb..fa1176a6985c9 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -107,8 +107,6 @@ def test_suffix_on_list_join(): # check proper errors are raised msg = "Suffixes not supported when joining multiple DataFrames" - with pytest.raises(ValueError, match=msg): - first.join([second], lsuffix="y") with pytest.raises(ValueError, match=msg): first.join([second, third], rsuffix="x") with pytest.raises(ValueError, match=msg): @@ -562,3 +560,21 @@ def test_frame_join_tzaware(self): tm.assert_index_equal(result.index, expected) assert result.index.tz.zone == "US/Central" + + def test_join_lists_index_with_multiindex(self): + test1 = DataFrame( + {"cat": pd.Categorical(["a", "v", "d"])}, + index=Index(["a", "b", "c"], name="y"), + ) + test2 = DataFrame( + {"foo": np.arange(6)}, + index=MultiIndex.from_tuples( + [(0, "a"), (0, "b"), (0, "c"), (1, "a"), (1, "b"), (1, "c")], + names=("x", "y"), + ), + ) + + result = test2.join([test1]) + expected = test2.join(test1) + + tm.assert_frame_equal(result, expected)