From 83d9ab752d2abd5708e1331b3c75dc177546f4b7 Mon Sep 17 00:00:00 2001 From: Johannes Mueller Date: Tue, 23 Nov 2021 13:22:16 +0100 Subject: [PATCH] BUG: join MultiIndex with overlapping IntervalIndex level (#44096) `BaseMultiIndexCodesEngine._extract_level_codes` relies on the assumption that `get_indexer()` is always usable as any index level `is_unique`. However, that is not the case for overlapping `IntervalIndex` levels. `get_indexer()` can not be used on an `IntervalIndex` that `is_overlapping` even if it `is_unique`. This patch uses `get_indexer_for()` instead. --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/_libs/index.pyx | 2 +- pandas/tests/indexes/multi/test_join.py | 43 +++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index ed5275333fda1..32a66be8b3b75 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -712,6 +712,7 @@ Reshaping - Fixed metadata propagation in :meth:`Dataframe.apply` method, consequently fixing the same issue for :meth:`Dataframe.transform`, :meth:`Dataframe.nunique` and :meth:`Dataframe.mode` (:issue:`28283`) - Bug in :meth:`DataFrame.stack` with ``ExtensionDtype`` columns incorrectly raising (:issue:`43561`) - Bug in :meth:`Series.unstack` with object doing unwanted type inference on resulting columns (:issue:`44595`) +- Bug in :class:`MultiIndex` failing join operations with overlapping ``IntervalIndex`` levels (:issue:`44096`) - Sparse diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 92837a43e2b69..670034c4dc4c1 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -649,7 +649,7 @@ cdef class BaseMultiIndexCodesEngine: Integers representing one combination each """ zt = [target._get_level_values(i) for i in range(target.nlevels)] - level_codes = [lev.get_indexer(codes) + 1 for lev, codes + level_codes = [lev.get_indexer_for(codes) + 1 for lev, codes in zip(self.levels, zt)] return self._codes_to_ints(np.array(level_codes, dtype='uint64').T) diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 3aa0ac1676acc..e6bec97aedb38 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -3,6 +3,7 @@ from pandas import ( Index, + Interval, MultiIndex, ) import pandas._testing as tm @@ -115,3 +116,45 @@ def test_join_multi_return_indexers(): result = midx1.join(midx2, return_indexers=False) tm.assert_index_equal(result, midx1) + + +def test_join_overlapping_interval_level(): + # GH 44096 + idx_1 = MultiIndex.from_tuples( + [ + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (1, Interval(2.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), # interval limit is here at 3.0, not at 2.0 + (2, Interval(3.0, 5.0)), + ], + names=["num", "interval"], + ) + + idx_2 = MultiIndex.from_tuples( + [ + (1, Interval(2.0, 5.0)), + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (2, Interval(3.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), + ], + names=["num", "interval"], + ) + + expected = MultiIndex.from_tuples( + [ + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (1, Interval(2.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), + (2, Interval(3.0, 5.0)), + ], + names=["num", "interval"], + ) + result = idx_1.join(idx_2, how="outer") + + tm.assert_index_equal(result, expected)