Skip to content

Commit 8e0acdf

Browse files
authored
BUG: setting categorical values into object dtype DataFrame (#39136)
1 parent 37d9a17 commit 8e0acdf

File tree

3 files changed

+53
-1
lines changed

3 files changed

+53
-1
lines changed

doc/source/whatsnew/v1.3.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ Categorical
186186
- Bug in ``CategoricalIndex.reindex`` failed when ``Index`` passed with elements all in category (:issue:`28690`)
187187
- Bug where constructing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`)
188188
- Bug in constructing a :class:`DataFrame` from an ``ndarray`` and a :class:`CategoricalDtype` (:issue:`38857`)
189+
- Bug in :meth:`DataFrame.reindex` was throwing ``IndexError`` when new index contained duplicates and old index was :class:`CategoricalIndex` (:issue:`38906`)
190+
- Bug in setting categorical values into an object-dtype column in a :class:`DataFrame` (:issue:`39136`)
189191
- Bug in :meth:`DataFrame.reindex` was raising ``IndexError`` when new index contained duplicates and old index was :class:`CategoricalIndex` (:issue:`38906`)
190192

191193
Datetimelike

pandas/core/internals/blocks.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -959,7 +959,13 @@ def setitem(self, indexer, value):
959959
# GH25495 - If the current dtype is not categorical,
960960
# we need to create a new categorical block
961961
values[indexer] = value
962-
return self.make_block(Categorical(self.values, dtype=arr_value.dtype))
962+
if values.ndim == 2:
963+
# TODO(EA2D): special case not needed with 2D EAs
964+
if values.shape[-1] != 1:
965+
# shouldn't get here (at least until 2D EAs)
966+
raise NotImplementedError
967+
values = values[:, 0]
968+
return self.make_block(Categorical(values, dtype=arr_value.dtype))
963969

964970
elif exact_match and is_ea_value:
965971
# GH#32395 if we're going to replace the values entirely, just

pandas/tests/indexing/test_iloc.py

+44
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,50 @@ def test_iloc_getitem_list_int(self):
6565
class TestiLocBaseIndependent:
6666
"""Tests Independent Of Base Class"""
6767

68+
@pytest.mark.parametrize(
69+
"key",
70+
[
71+
slice(None),
72+
slice(3),
73+
range(3),
74+
[0, 1, 2],
75+
Index(range(3)),
76+
np.asarray([0, 1, 2]),
77+
],
78+
)
79+
@pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
80+
def test_iloc_setitem_fullcol_categorical(self, indexer, key):
81+
frame = DataFrame({0: range(3)}, dtype=object)
82+
83+
cat = Categorical(["alpha", "beta", "gamma"])
84+
expected = DataFrame({0: cat})
85+
# NB: pending GH#38896, the expected likely should become
86+
# expected= DataFrame({"A": cat.astype(object)})
87+
# and should remain a view on the original values
88+
89+
assert frame._mgr.blocks[0]._can_hold_element(cat)
90+
91+
df = frame.copy()
92+
orig_vals = df.values
93+
indexer(df)[key, 0] = cat
94+
95+
overwrite = not isinstance(key, slice)
96+
97+
tm.assert_frame_equal(df, expected)
98+
99+
# TODO: this inconsistency is likely undesired GH#39986
100+
if overwrite:
101+
# check that we overwrote underlying
102+
tm.assert_numpy_array_equal(orig_vals, df.values)
103+
104+
# but we don't have a view on orig_vals
105+
orig_vals[0, 0] = 19
106+
assert df.iloc[0, 0] != 19
107+
108+
# check we dont have a view on cat (may be undesired GH#39986)
109+
df.iloc[0, 0] = "gamma"
110+
assert cat[0] != "gamma"
111+
68112
@pytest.mark.parametrize("box", [pd_array, Series])
69113
def test_iloc_setitem_ea_inplace(self, frame_or_series, box):
70114
# GH#38952 Case with not setting a full column

0 commit comments

Comments
 (0)