Skip to content

Commit 3bd8aee

Browse files
qwhelanPingviinituutti
authored andcommitted
BUG: CategoricalIndex allows reindexing with non-unique CategoricalIndex (pandas-dev#23963)
1 parent 85c5e56 commit 3bd8aee

File tree

4 files changed

+36
-4
lines changed

4 files changed

+36
-4
lines changed

doc/source/whatsnew/v0.24.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,7 @@ Other API Changes
10911091
- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
10921092
- :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).
10931093
- The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`)
1094+
- :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`).
10941095

10951096
.. _whatsnew_0240.deprecations:
10961097

@@ -1359,6 +1360,7 @@ Numeric
13591360
- Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`).
13601361
- :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`)
13611362
- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2:sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`)
1363+
- Calls such as :meth:`DataFrame.round` with a non-unique :meth:`CategoricalIndex` now return expected data. Previously, data would be improperly duplicated (:issue:`21809`).
13621364

13631365
Strings
13641366
^^^^^^^

pandas/core/indexes/category.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -534,12 +534,16 @@ def reindex(self, target, method=None, level=None, limit=None,
534534

535535
target = ibase.ensure_index(target)
536536

537-
if not is_categorical_dtype(target) and not target.is_unique:
538-
raise ValueError("cannot reindex with a non-unique indexer")
537+
if self.equals(target):
538+
indexer = None
539+
missing = []
540+
else:
541+
if not target.is_unique:
542+
raise ValueError("cannot reindex with a non-unique indexer")
539543

540-
indexer, missing = self.get_indexer_non_unique(np.array(target))
544+
indexer, missing = self.get_indexer_non_unique(np.array(target))
541545

542-
if len(self.codes):
546+
if len(self.codes) and indexer is not None:
543547
new_target = self.take(indexer)
544548
else:
545549
new_target = target

pandas/tests/frame/test_analytics.py

+15
Original file line numberDiff line numberDiff line change
@@ -1805,6 +1805,21 @@ def test_built_in_round(self):
18051805
{'col1': [1., 2., 3.], 'col2': [1., 2., 3.]})
18061806
tm.assert_frame_equal(round(df), expected_rounded)
18071807

1808+
def test_round_nonunique_categorical(self):
1809+
# See GH21809
1810+
idx = pd.CategoricalIndex(['low'] * 3 + ['hi'] * 3)
1811+
df = pd.DataFrame(np.random.rand(6, 3), columns=list('abc'))
1812+
1813+
expected = df.round(3)
1814+
expected.index = idx
1815+
1816+
df_categorical = df.copy().set_index(idx)
1817+
assert df_categorical.shape == (6, 3)
1818+
result = df_categorical.round(3)
1819+
assert result.shape == (6, 3)
1820+
1821+
tm.assert_frame_equal(result, expected)
1822+
18081823
def test_pct_change(self):
18091824
# GH 11150
18101825
pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange(

pandas/tests/indexes/test_category.py

+11
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,17 @@ def test_reindex_dtype(self):
540540
tm.assert_numpy_array_equal(indexer,
541541
np.array([0, 3, 2], dtype=np.intp))
542542

543+
def test_reindex_duplicate_target(self):
544+
# See GH23963
545+
c = CategoricalIndex(['a', 'b', 'c', 'a'],
546+
categories=['a', 'b', 'c', 'd'])
547+
with pytest.raises(ValueError, match='non-unique indexer'):
548+
c.reindex(['a', 'a', 'c'])
549+
550+
with pytest.raises(ValueError, match='non-unique indexer'):
551+
c.reindex(CategoricalIndex(['a', 'a', 'c'],
552+
categories=['a', 'b', 'c', 'd']))
553+
543554
def test_reindex_empty_index(self):
544555
# See GH16770
545556
c = CategoricalIndex([])

0 commit comments

Comments
 (0)