Skip to content

Commit 6450827

Browse files
committed
BUG: CategoricalIndex allows reindexing with non-unique CategoricalIndex
1 parent 0e7cf48 commit 6450827

File tree

4 files changed

+30
-4
lines changed

4 files changed

+30
-4
lines changed

doc/source/whatsnew/v0.24.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1018,6 +1018,8 @@ Other API Changes
10181018
- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
10191019
- :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).
10201020
- The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`)
1021+
- :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` in all cases if the target index is non-unique. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`).
1022+
- Calls such as :meth:`DataFrame.round` with a non-unique :meth:`CategoricalIndex` now return expected data. Previously, data would be improperly duplicated (:issue:`21809`).
10211023

10221024
.. _whatsnew_0240.deprecations:
10231025

pandas/core/indexes/category.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -522,12 +522,16 @@ def reindex(self, target, method=None, level=None, limit=None,
522522

523523
target = ibase.ensure_index(target)
524524

525-
if not is_categorical_dtype(target) and not target.is_unique:
526-
raise ValueError("cannot reindex with a non-unique indexer")
525+
if self.equals(target):
526+
indexer = None
527+
missing = []
528+
else:
529+
if not target.is_unique:
530+
raise ValueError("cannot reindex with a non-unique indexer")
527531

528-
indexer, missing = self.get_indexer_non_unique(np.array(target))
532+
indexer, missing = self.get_indexer_non_unique(np.array(target))
529533

530-
if len(self.codes):
534+
if len(self.codes) and indexer is not None:
531535
new_target = self.take(indexer)
532536
else:
533537
new_target = target

pandas/tests/frame/test_analytics.py

+9
Original file line numberDiff line numberDiff line change
@@ -1805,6 +1805,15 @@ def test_built_in_round(self):
18051805
{'col1': [1., 2., 3.], 'col2': [1., 2., 3.]})
18061806
tm.assert_frame_equal(round(df), expected_rounded)
18071807

1808+
def test_round_nonunique_categorical(self):
1809+
# See GH21809
1810+
idx = pd.CategoricalIndex(['low'] * 3 + ['hi'] * 3)
1811+
dfb = pd.DataFrame(np.random.rand(6, 3), columns=list('abc'),
1812+
index=idx)
1813+
assert dfb.shape == (6, 3)
1814+
dfb_round = dfb.round(3)
1815+
assert dfb_round.shape == (6, 3)
1816+
18081817
def test_pct_change(self):
18091818
# GH 11150
18101819
pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange(

pandas/tests/indexes/test_category.py

+11
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,17 @@ def test_reindex_dtype(self):
540540
tm.assert_numpy_array_equal(indexer,
541541
np.array([0, 3, 2], dtype=np.intp))
542542

543+
def test_reindex_duplicate_target(self):
544+
# See GH23963
545+
c = CategoricalIndex(['a', 'b', 'c', 'a'],
546+
categories=['a', 'b', 'c', 'd'])
547+
with pytest.raises(ValueError, match='non-unique indexer'):
548+
c.reindex(['a', 'a', 'c'])
549+
550+
with pytest.raises(ValueError, match='non-unique indexer'):
551+
c.reindex(CategoricalIndex(['a', 'a', 'c'],
552+
categories=['a', 'b', 'c', 'd']))
553+
543554
def test_reindex_empty_index(self):
544555
# See GH16770
545556
c = CategoricalIndex([])

0 commit comments

Comments
 (0)