diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1d76c9d296255..d90ccd57be16a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -192,6 +192,7 @@ Categorical - Bug in ``CategoricalIndex.reindex`` failed when ``Index`` passed with elements all in category (:issue:`28690`) - Bug where constructing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`) - Bug in constructing a :class:`DataFrame` from an ``ndarray`` and a :class:`CategoricalDtype` (:issue:`38857`) +- Bug in :meth:`DataFrame.reindex` was throwing ``IndexError`` when new index contained duplicates and old index was :class:`CategoricalIndex` (:issue:`38906`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 92a4d0a125195..a2e9737f305ba 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3730,8 +3730,13 @@ def _reindex_non_unique(self, target): new_labels[cur_indexer] = cur_labels new_labels[missing_indexer] = missing_labels + # GH#38906 + if not len(self): + + new_indexer = np.arange(0) + # a unique indexer - if target.is_unique: + elif target.is_unique: # see GH5553, make sure we use the right indexer new_indexer = np.arange(len(indexer)) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 4cbdf61ff8dae..2b80e06861a00 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1717,6 +1717,34 @@ def test_setitem(self, uint64_frame): ) +@pytest.mark.parametrize( + "src_idx", + [ + Index([]), + pd.CategoricalIndex([]), + ], +) +@pytest.mark.parametrize( + "cat_idx", + [ + # No duplicates + Index([]), + pd.CategoricalIndex([]), + Index(["A", "B"]), + pd.CategoricalIndex(["A", "B"]), + # Duplicates: GH#38906 + Index(["A", "A"]), + pd.CategoricalIndex(["A", "A"]), + ], +) +def test_reindex_empty(src_idx, cat_idx): + df = DataFrame(columns=src_idx, index=["K"], dtype="f8") + + result = df.reindex(columns=cat_idx) + expected = DataFrame(index=["K"], columns=cat_idx, dtype="f8") + tm.assert_frame_equal(result, expected) + + def test_object_casting_indexing_wraps_datetimelike(): # GH#31649, check the indexing methods all the way down the stack df = DataFrame(