From ee7b3f6b733cf06a881aeba26de772f139e06a0b Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Fri, 1 Sep 2023 21:46:14 -0400 Subject: [PATCH 1/2] fix regression in MultiIndex.append for overlapping IntervalIndex levels --- doc/source/whatsnew/v2.1.1.rst | 1 + pandas/core/arrays/categorical.py | 8 +++++--- pandas/tests/indexes/multi/test_reshape.py | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst index 8b4833f6ce043..daca33e11f513 100644 --- a/doc/source/whatsnew/v2.1.1.rst +++ b/doc/source/whatsnew/v2.1.1.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed regression in :func:`read_csv` when ``usecols`` is given and ``dtypes`` is a dict for ``engine="python"`` (:issue:`54868`) - Fixed regression in :meth:`.GroupBy.get_group` raising for ``axis=1`` (:issue:`54858`) - Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`) +- Fixed regression in :meth:`MultiIndex.append` raising when appending overlapping :class:`IntervalIndex` levels (:issue:`54934`) - Fixed regression in :meth:`Series.value_counts` raising for numeric data if ``bins`` was specified (:issue:`54857`) - Fixed regression when comparing a :class:`Series` with ``datetime64`` dtype with ``None`` (:issue:`54870`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9f63d1f97c54f..b6f14d9b002bc 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2947,9 +2947,11 @@ def recode_for_categories( return codes.copy() return codes - indexer = coerce_indexer_dtype( - new_categories.get_indexer(old_categories), new_categories - ) + if new_categories._index_as_unique: + indexer = new_categories.get_indexer(old_categories) + else: + indexer = new_categories.get_indexer_non_unique(old_categories)[0] + indexer = coerce_indexer_dtype(indexer, new_categories) new_codes = take_nd(indexer, codes, fill_value=-1) return new_codes diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index da9838d4a2ed3..06dbb33aadf97 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -169,6 +169,28 @@ def test_append_names_dont_match(): tm.assert_index_equal(result, expected) +def test_append_overlapping_interval_levels(): + # GH 54934 + ivl1 = pd.IntervalIndex.from_breaks([0.0, 1.0, 2.0]) + ivl2 = pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5]) + mi1 = MultiIndex.from_product([ivl1, ivl1]) + mi2 = MultiIndex.from_product([ivl2, ivl2]) + result = mi1.append(mi2) + expected = MultiIndex.from_tuples( + [ + (pd.Interval(0.0, 1.0), pd.Interval(0.0, 1.0)), + (pd.Interval(0.0, 1.0), pd.Interval(1.0, 2.0)), + (pd.Interval(1.0, 2.0), pd.Interval(0.0, 1.0)), + (pd.Interval(1.0, 2.0), pd.Interval(1.0, 2.0)), + (pd.Interval(0.5, 1.5), pd.Interval(0.5, 1.5)), + (pd.Interval(0.5, 1.5), pd.Interval(1.5, 2.5)), + (pd.Interval(1.5, 2.5), pd.Interval(0.5, 1.5)), + (pd.Interval(1.5, 2.5), pd.Interval(1.5, 2.5)), + ] + ) + tm.assert_index_equal(result, expected) + + def test_repeat(): reps = 2 numbers = [1, 2, 3] From 28e9065351f5bddc4c5f631ce65acd0ad9387906 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 2 Sep 2023 06:11:28 -0400 Subject: [PATCH 2/2] use get_indexer_for --- pandas/core/arrays/categorical.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b6f14d9b002bc..da4bf987d0386 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2947,11 +2947,9 @@ def recode_for_categories( return codes.copy() return codes - if new_categories._index_as_unique: - indexer = new_categories.get_indexer(old_categories) - else: - indexer = new_categories.get_indexer_non_unique(old_categories)[0] - indexer = coerce_indexer_dtype(indexer, new_categories) + indexer = coerce_indexer_dtype( + new_categories.get_indexer_for(old_categories), new_categories + ) new_codes = take_nd(indexer, codes, fill_value=-1) return new_codes