Skip to content

Commit 5f3073e

Browse files
batterseapowerluckyvs1
authored andcommitted
BUG: reindexing empty CategoricalIndex would fail if target had duplicates (pandas-dev#39046)
1 parent 2726065 commit 5f3073e

File tree

3 files changed

+35
-1
lines changed

3 files changed

+35
-1
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ Categorical
192192
- Bug in ``CategoricalIndex.reindex`` failed when ``Index`` passed with elements all in category (:issue:`28690`)
193193
- Bug where constructing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`)
194194
- Bug in constructing a :class:`DataFrame` from an ``ndarray`` and a :class:`CategoricalDtype` (:issue:`38857`)
195+
- Bug in :meth:`DataFrame.reindex` was throwing ``IndexError`` when new index contained duplicates and old index was :class:`CategoricalIndex` (:issue:`38906`)
195196

196197
Datetimelike
197198
^^^^^^^^^^^^

pandas/core/indexes/base.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -3730,8 +3730,13 @@ def _reindex_non_unique(self, target):
37303730
new_labels[cur_indexer] = cur_labels
37313731
new_labels[missing_indexer] = missing_labels
37323732

3733+
# GH#38906
3734+
if not len(self):
3735+
3736+
new_indexer = np.arange(0)
3737+
37333738
# a unique indexer
3734-
if target.is_unique:
3739+
elif target.is_unique:
37353740

37363741
# see GH5553, make sure we use the right indexer
37373742
new_indexer = np.arange(len(indexer))

pandas/tests/frame/indexing/test_indexing.py

+28
Original file line numberDiff line numberDiff line change
@@ -1717,6 +1717,34 @@ def test_setitem(self, uint64_frame):
17171717
)
17181718

17191719

1720+
@pytest.mark.parametrize(
1721+
"src_idx",
1722+
[
1723+
Index([]),
1724+
pd.CategoricalIndex([]),
1725+
],
1726+
)
1727+
@pytest.mark.parametrize(
1728+
"cat_idx",
1729+
[
1730+
# No duplicates
1731+
Index([]),
1732+
pd.CategoricalIndex([]),
1733+
Index(["A", "B"]),
1734+
pd.CategoricalIndex(["A", "B"]),
1735+
# Duplicates: GH#38906
1736+
Index(["A", "A"]),
1737+
pd.CategoricalIndex(["A", "A"]),
1738+
],
1739+
)
1740+
def test_reindex_empty(src_idx, cat_idx):
1741+
df = DataFrame(columns=src_idx, index=["K"], dtype="f8")
1742+
1743+
result = df.reindex(columns=cat_idx)
1744+
expected = DataFrame(index=["K"], columns=cat_idx, dtype="f8")
1745+
tm.assert_frame_equal(result, expected)
1746+
1747+
17201748
def test_object_casting_indexing_wraps_datetimelike():
17211749
# GH#31649, check the indexing methods all the way down the stack
17221750
df = DataFrame(

0 commit comments

Comments
 (0)