diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 9395d730d99ee..3fa48c7e9a1fc 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -185,7 +185,7 @@ Bug Fixes - Fixed compatibility issue in ``DatetimeIndex`` affecting architectures where ``numpy.int_`` defaults to ``numpy.int32`` (:issue:`8943`) - Bug in Panel indexing with an object-like (:issue:`9140`) - Bug in the returned ``Series.dt.components`` index was reset to the default index (:issue:`9247`) - +- Bug in ``Categorical.__getitem__/__setitem__`` with listlike input getting incorrect results from indexer coercion (:issue:`9469`) - Fixed bug in ``to_sql`` when mapping a ``Timestamp`` object column (datetime column with timezone info) to the according sqlalchemy type (:issue:`9085`). diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 28c9d096e06d3..4ccbf6cc9f9b5 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1229,7 +1229,6 @@ def __getitem__(self, key): else: return self.categories[i] else: - key = self._maybe_coerce_indexer(key) return Categorical(values=self._codes[key], categories=self.categories, ordered=self.ordered, fastpath=True) @@ -1253,6 +1252,7 @@ def __setitem__(self, key, value): rvalue = value if is_list_like(value) else [value] to_add = Index(rvalue).difference(self.categories) + # no assignments of values not in categories, but it's always ok to set something to np.nan if len(to_add) and not isnull(to_add).all(): raise ValueError("cannot setitem on a Categorical with a new category," @@ -1297,7 +1297,6 @@ def __setitem__(self, key, value): nan_pos = np.where(isnull(self.categories))[0] lindexer[lindexer == -1] = nan_pos - key = self._maybe_coerce_indexer(key) lindexer = self._maybe_coerce_indexer(lindexer) self._codes[key] = lindexer diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 7a8d5e0ac0032..5726ecab4d1a1 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -34,6 +34,50 @@ def test_getitem(self): subf = self.factor[np.asarray(self.factor) == 'c'] tm.assert_almost_equal(subf._codes, [2, 2, 2]) + def test_getitem_listlike(self): + + # GH 9469 + # properly coerce the input indexers + np.random.seed(1) + c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8)) + result = c.codes[np.array([100000]).astype(np.int64)] + expected = c[np.array([100000]).astype(np.int64)].codes + self.assert_numpy_array_equal(result, expected) + + def test_setitem(self): + + # int/positional + c = self.factor.copy() + c[0] = 'b' + self.assertEqual(c[0], 'b') + c[-1] = 'a' + self.assertEqual(c[-1], 'a') + + # boolean + c = self.factor.copy() + indexer = np.zeros(len(c),dtype='bool') + indexer[0] = True + indexer[-1] = True + c[indexer] = 'c' + expected = Categorical.from_array(['c', 'b', 'b', 'a', + 'a', 'c', 'c', 'c']) + + self.assert_categorical_equal(c, expected) + + def test_setitem_listlike(self): + + # GH 9469 + # properly coerce the input indexers + np.random.seed(1) + c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8)).add_categories([-1000]) + indexer = np.array([100000]).astype(np.int64) + c[indexer] = -1000 + + # we are asserting the code result here + # which maps to the -1000 category + result = c.codes[np.array([100000]).astype(np.int64)] + self.assertEqual(result, np.array([5], dtype='int8')) + def test_constructor_unsortable(self): # it works!