Skip to content

Commit c37f8df

Browse files
committed
Merge pull request #9470 from jreback/cat
BUG: Bug in Categorical.__getitem__/__setitem__ with listlike input getting incorrect result from indexer coercion (GH9469)
2 parents 2f41991 + f63a8a0 commit c37f8df

File tree

3 files changed

+46
-3
lines changed

3 files changed

+46
-3
lines changed

doc/source/whatsnew/v0.16.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ Bug Fixes
185185
- Fixed compatibility issue in ``DatetimeIndex`` affecting architectures where ``numpy.int_`` defaults to ``numpy.int32`` (:issue:`8943`)
186186
- Bug in Panel indexing with an object-like (:issue:`9140`)
187187
- Bug in the returned ``Series.dt.components`` index was reset to the default index (:issue:`9247`)
188-
188+
- Bug in ``Categorical.__getitem__/__setitem__`` with listlike input getting incorrect results from indexer coercion (:issue:`9469`)
189189

190190
- Fixed bug in ``to_sql`` when mapping a ``Timestamp`` object column (datetime
191191
column with timezone info) to the according sqlalchemy type (:issue:`9085`).

pandas/core/categorical.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1229,7 +1229,6 @@ def __getitem__(self, key):
12291229
else:
12301230
return self.categories[i]
12311231
else:
1232-
key = self._maybe_coerce_indexer(key)
12331232
return Categorical(values=self._codes[key], categories=self.categories,
12341233
ordered=self.ordered, fastpath=True)
12351234

@@ -1253,6 +1252,7 @@ def __setitem__(self, key, value):
12531252

12541253
rvalue = value if is_list_like(value) else [value]
12551254
to_add = Index(rvalue).difference(self.categories)
1255+
12561256
# no assignments of values not in categories, but it's always ok to set something to np.nan
12571257
if len(to_add) and not isnull(to_add).all():
12581258
raise ValueError("cannot setitem on a Categorical with a new category,"
@@ -1297,7 +1297,6 @@ def __setitem__(self, key, value):
12971297
nan_pos = np.where(isnull(self.categories))[0]
12981298
lindexer[lindexer == -1] = nan_pos
12991299

1300-
key = self._maybe_coerce_indexer(key)
13011300
lindexer = self._maybe_coerce_indexer(lindexer)
13021301
self._codes[key] = lindexer
13031302

pandas/tests/test_categorical.py

+44
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,50 @@ def test_getitem(self):
3434
subf = self.factor[np.asarray(self.factor) == 'c']
3535
tm.assert_almost_equal(subf._codes, [2, 2, 2])
3636

37+
def test_getitem_listlike(self):
38+
39+
# GH 9469
40+
# properly coerce the input indexers
41+
np.random.seed(1)
42+
c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8))
43+
result = c.codes[np.array([100000]).astype(np.int64)]
44+
expected = c[np.array([100000]).astype(np.int64)].codes
45+
self.assert_numpy_array_equal(result, expected)
46+
47+
def test_setitem(self):
48+
49+
# int/positional
50+
c = self.factor.copy()
51+
c[0] = 'b'
52+
self.assertEqual(c[0], 'b')
53+
c[-1] = 'a'
54+
self.assertEqual(c[-1], 'a')
55+
56+
# boolean
57+
c = self.factor.copy()
58+
indexer = np.zeros(len(c),dtype='bool')
59+
indexer[0] = True
60+
indexer[-1] = True
61+
c[indexer] = 'c'
62+
expected = Categorical.from_array(['c', 'b', 'b', 'a',
63+
'a', 'c', 'c', 'c'])
64+
65+
self.assert_categorical_equal(c, expected)
66+
67+
def test_setitem_listlike(self):
68+
69+
# GH 9469
70+
# properly coerce the input indexers
71+
np.random.seed(1)
72+
c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8)).add_categories([-1000])
73+
indexer = np.array([100000]).astype(np.int64)
74+
c[indexer] = -1000
75+
76+
# we are asserting the code result here
77+
# which maps to the -1000 category
78+
result = c.codes[np.array([100000]).astype(np.int64)]
79+
self.assertEqual(result, np.array([5], dtype='int8'))
80+
3781
def test_constructor_unsortable(self):
3882

3983
# it works!

0 commit comments

Comments
 (0)