diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index f888648a9363e..a6171d39ae27f 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1090,6 +1090,7 @@ Other API Changes - :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`). - :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`). - The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`) +- :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`). .. _whatsnew_0240.deprecations: @@ -1356,6 +1357,7 @@ Numeric - Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`). - :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`) - Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2:sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`) +- Calls such as :meth:`DataFrame.round` with a non-unique :meth:`CategoricalIndex` now return expected data. Previously, data would be improperly duplicated (:issue:`21809`). Strings ^^^^^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 6b84e8deea493..1ad9f285b755f 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -532,12 +532,16 @@ def reindex(self, target, method=None, level=None, limit=None, target = ibase.ensure_index(target) - if not is_categorical_dtype(target) and not target.is_unique: - raise ValueError("cannot reindex with a non-unique indexer") + if self.equals(target): + indexer = None + missing = [] + else: + if not target.is_unique: + raise ValueError("cannot reindex with a non-unique indexer") - indexer, missing = self.get_indexer_non_unique(np.array(target)) + indexer, missing = self.get_indexer_non_unique(np.array(target)) - if len(self.codes): + if len(self.codes) and indexer is not None: new_target = self.take(indexer) else: new_target = target diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index c9481fef4aa36..2bf2dd593184f 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1805,6 +1805,21 @@ def test_built_in_round(self): {'col1': [1., 2., 3.], 'col2': [1., 2., 3.]}) tm.assert_frame_equal(round(df), expected_rounded) + def test_round_nonunique_categorical(self): + # See GH21809 + idx = pd.CategoricalIndex(['low'] * 3 + ['hi'] * 3) + df = pd.DataFrame(np.random.rand(6, 3), columns=list('abc')) + + expected = df.round(3) + expected.index = idx + + df_categorical = df.copy().set_index(idx) + assert df_categorical.shape == (6, 3) + result = df_categorical.round(3) + assert result.shape == (6, 3) + + tm.assert_frame_equal(result, expected) + def test_pct_change(self): # GH 11150 pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange( diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 6c5a70d76e3b5..937e5e5a6af51 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -540,6 +540,17 @@ def test_reindex_dtype(self): tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + def test_reindex_duplicate_target(self): + # See GH23963 + c = CategoricalIndex(['a', 'b', 'c', 'a'], + categories=['a', 'b', 'c', 'd']) + with pytest.raises(ValueError, match='non-unique indexer'): + c.reindex(['a', 'a', 'c']) + + with pytest.raises(ValueError, match='non-unique indexer'): + c.reindex(CategoricalIndex(['a', 'a', 'c'], + categories=['a', 'b', 'c', 'd'])) + def test_reindex_empty_index(self): # See GH16770 c = CategoricalIndex([])