From 919556382f332b75e9c7225b48cc02a66fa91147 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 8 Sep 2022 08:03:25 -0700 Subject: [PATCH 1/2] BUG: merge with CategoricalIndex for left_on/right_on --- doc/source/whatsnew/v1.6.0.rst | 2 +- pandas/core/reshape/merge.py | 3 +++ pandas/tests/reshape/merge/test_merge.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 42d3ce8069322..93c158cad97d5 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -190,7 +190,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- +- Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`?`) - Sparse diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 5052c27ea47f3..523283b4c07ec 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1250,6 +1250,9 @@ def _maybe_coerce_merge_keys(self) -> None: if (len(lk) and not len(rk)) or (not len(lk) and len(rk)): continue + lk = extract_array(lk, extract_numpy=True) + rk = extract_array(rk, extract_numpy=True) + lk_is_cat = is_categorical_dtype(lk.dtype) rk_is_cat = is_categorical_dtype(rk.dtype) lk_is_object = is_object_dtype(lk.dtype) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c7d7d1b0daa50..c49510aa6fc66 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2655,6 +2655,18 @@ def test_mergeerror_on_left_index_mismatched_dtypes(): merge(df_1, df_2, on=["C"], left_index=True) +def test_merge_on_left_categoricalindex(): + # don't raise when left_on is a CategoricalIndex + ci = CategoricalIndex(range(3)) + + right = DataFrame({"A": ci, "B": range(3)}) + left = DataFrame({"C": range(3, 6)}) + + res = merge(left, right, left_on=ci, right_on="A") + expected = merge(left, right, left_on=ci._data, right_on="A") + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize("dtype", [None, "Int64"]) def test_merge_outer_with_NaN(dtype): # GH#43550 From 5cee3f5742715d85e5974a575c027ea1827c4b82 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 8 Sep 2022 08:05:24 -0700 Subject: [PATCH 2/2] GH refs --- doc/source/whatsnew/v1.6.0.rst | 2 +- pandas/tests/reshape/merge/test_merge.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 93c158cad97d5..790f7f882322a 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -190,7 +190,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`?`) +- Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`) - Sparse diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c49510aa6fc66..9edc73797bd6c 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2656,7 +2656,7 @@ def test_mergeerror_on_left_index_mismatched_dtypes(): def test_merge_on_left_categoricalindex(): - # don't raise when left_on is a CategoricalIndex + # GH#48464 don't raise when left_on is a CategoricalIndex ci = CategoricalIndex(range(3)) right = DataFrame({"A": ci, "B": range(3)})