diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 0cb46a5164674..25bd3b43be3ed 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -186,6 +186,8 @@ Categorical - Bug in ``CategoricalIndex.reindex`` failed when ``Index`` passed with elements all in category (:issue:`28690`) - Bug where constructing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`) - Bug in constructing a :class:`DataFrame` from an ``ndarray`` and a :class:`CategoricalDtype` (:issue:`38857`) +- Bug in :meth:`DataFrame.reindex` was throwing ``IndexError`` when new index contained duplicates and old index was :class:`CategoricalIndex` (:issue:`38906`) +- Bug in setting categorical values into an object-dtype column in a :class:`DataFrame` (:issue:`39136`) - Bug in :meth:`DataFrame.reindex` was raising ``IndexError`` when new index contained duplicates and old index was :class:`CategoricalIndex` (:issue:`38906`) Datetimelike diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0216334a4c0aa..6f6f17171537f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -959,7 +959,13 @@ def setitem(self, indexer, value): # GH25495 - If the current dtype is not categorical, # we need to create a new categorical block values[indexer] = value - return self.make_block(Categorical(self.values, dtype=arr_value.dtype)) + if values.ndim == 2: + # TODO(EA2D): special case not needed with 2D EAs + if values.shape[-1] != 1: + # shouldn't get here (at least until 2D EAs) + raise NotImplementedError + values = values[:, 0] + return self.make_block(Categorical(values, dtype=arr_value.dtype)) elif exact_match and is_ea_value: # GH#32395 if we're going to replace the values entirely, just diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 7dcb30efb8184..1668123e782ff 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -65,6 +65,50 @@ def test_iloc_getitem_list_int(self): class TestiLocBaseIndependent: """Tests Independent Of Base Class""" + @pytest.mark.parametrize( + "key", + [ + slice(None), + slice(3), + range(3), + [0, 1, 2], + Index(range(3)), + np.asarray([0, 1, 2]), + ], + ) + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_iloc_setitem_fullcol_categorical(self, indexer, key): + frame = DataFrame({0: range(3)}, dtype=object) + + cat = Categorical(["alpha", "beta", "gamma"]) + expected = DataFrame({0: cat}) + # NB: pending GH#38896, the expected likely should become + # expected= DataFrame({"A": cat.astype(object)}) + # and should remain a view on the original values + + assert frame._mgr.blocks[0]._can_hold_element(cat) + + df = frame.copy() + orig_vals = df.values + indexer(df)[key, 0] = cat + + overwrite = not isinstance(key, slice) + + tm.assert_frame_equal(df, expected) + + # TODO: this inconsistency is likely undesired GH#39986 + if overwrite: + # check that we overwrote underlying + tm.assert_numpy_array_equal(orig_vals, df.values) + + # but we don't have a view on orig_vals + orig_vals[0, 0] = 19 + assert df.iloc[0, 0] != 19 + + # check we dont have a view on cat (may be undesired GH#39986) + df.iloc[0, 0] = "gamma" + assert cat[0] != "gamma" + @pytest.mark.parametrize("box", [pd_array, Series]) def test_iloc_setitem_ea_inplace(self, frame_or_series, box): # GH#38952 Case with not setting a full column