Skip to content

Commit 142faf0

Browse files
phoflKevin D Smith
authored and
Kevin D Smith
committed
BUG: Non deterministic level order in MultiIndex with join (pandas-dev#37199)
1 parent 4cfd5d8 commit 142faf0

File tree

3 files changed

+26
-4
lines changed

3 files changed

+26
-4
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,7 @@ Reshaping
505505
- Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`)
506506
- Bug in :meth:`DataFrame.agg` with ``func={'name':<FUNC>}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`)
507507
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`)
508+
- Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`)
508509
-
509510

510511
Sparse

pandas/core/indexes/base.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -3597,8 +3597,12 @@ def _join_multi(self, other, how, return_indexers=True):
35973597
from pandas.core.reshape.merge import restore_dropped_levels_multijoin
35983598

35993599
# figure out join names
3600-
self_names = set(com.not_none(*self.names))
3601-
other_names = set(com.not_none(*other.names))
3600+
self_names_list = list(com.not_none(*self.names))
3601+
other_names_list = list(com.not_none(*other.names))
3602+
self_names_order = self_names_list.index
3603+
other_names_order = other_names_list.index
3604+
self_names = set(self_names_list)
3605+
other_names = set(other_names_list)
36023606
overlap = self_names & other_names
36033607

36043608
# need at least 1 in common
@@ -3608,8 +3612,8 @@ def _join_multi(self, other, how, return_indexers=True):
36083612
if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
36093613

36103614
# Drop the non-matching levels from left and right respectively
3611-
ldrop_names = list(self_names - overlap)
3612-
rdrop_names = list(other_names - overlap)
3615+
ldrop_names = sorted(self_names - overlap, key=self_names_order)
3616+
rdrop_names = sorted(other_names - overlap, key=other_names_order)
36133617

36143618
# if only the order differs
36153619
if not len(ldrop_names + rdrop_names):

pandas/tests/reshape/merge/test_join.py

+17
Original file line numberDiff line numberDiff line change
@@ -787,3 +787,20 @@ def _join_by_hand(a, b, how="left"):
787787
for col, s in b_re.items():
788788
a_re[col] = s
789789
return a_re.reindex(columns=result_columns)
790+
791+
792+
def test_join_inner_multiindex_deterministic_order():
793+
# GH: 36910
794+
left = pd.DataFrame(
795+
data={"e": 5},
796+
index=pd.MultiIndex.from_tuples([(1, 2, 4)], names=("a", "b", "d")),
797+
)
798+
right = pd.DataFrame(
799+
data={"f": 6}, index=pd.MultiIndex.from_tuples([(2, 3)], names=("b", "c"))
800+
)
801+
result = left.join(right, how="inner")
802+
expected = pd.DataFrame(
803+
{"e": [5], "f": [6]},
804+
index=pd.MultiIndex.from_tuples([(2, 1, 4, 3)], names=("b", "a", "d", "c")),
805+
)
806+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)