Skip to content

BUG: Bug in Categorical.__getitem__/__setitem__ with listlike input getting incorrect result from indexer coercion (GH9469) #9470

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 12, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ Bug Fixes
- Fixed compatibility issue in ``DatetimeIndex`` affecting architectures where ``numpy.int_`` defaults to ``numpy.int32`` (:issue:`8943`)
- Bug in Panel indexing with an object-like (:issue:`9140`)
- Bug in the returned ``Series.dt.components`` index was reset to the default index (:issue:`9247`)

- Bug in ``Categorical.__getitem__/__setitem__`` with listlike input getting incorrect results from indexer coercion (:issue:`9469`)

- Fixed bug in ``to_sql`` when mapping a ``Timestamp`` object column (datetime
column with timezone info) to the according sqlalchemy type (:issue:`9085`).
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1229,7 +1229,6 @@ def __getitem__(self, key):
else:
return self.categories[i]
else:
key = self._maybe_coerce_indexer(key)
return Categorical(values=self._codes[key], categories=self.categories,
ordered=self.ordered, fastpath=True)

Expand All @@ -1253,6 +1252,7 @@ def __setitem__(self, key, value):

rvalue = value if is_list_like(value) else [value]
to_add = Index(rvalue).difference(self.categories)

# no assignments of values not in categories, but it's always ok to set something to np.nan
if len(to_add) and not isnull(to_add).all():
raise ValueError("cannot setitem on a Categorical with a new category,"
Expand Down Expand Up @@ -1297,7 +1297,6 @@ def __setitem__(self, key, value):
nan_pos = np.where(isnull(self.categories))[0]
lindexer[lindexer == -1] = nan_pos

key = self._maybe_coerce_indexer(key)
lindexer = self._maybe_coerce_indexer(lindexer)
self._codes[key] = lindexer

Expand Down
44 changes: 44 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,50 @@ def test_getitem(self):
subf = self.factor[np.asarray(self.factor) == 'c']
tm.assert_almost_equal(subf._codes, [2, 2, 2])

def test_getitem_listlike(self):

# GH 9469
# properly coerce the input indexers
np.random.seed(1)
c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8))
result = c.codes[np.array([100000]).astype(np.int64)]
expected = c[np.array([100000]).astype(np.int64)].codes
self.assert_numpy_array_equal(result, expected)

def test_setitem(self):

# int/positional
c = self.factor.copy()
c[0] = 'b'
self.assertEqual(c[0], 'b')
c[-1] = 'a'
self.assertEqual(c[-1], 'a')

# boolean
c = self.factor.copy()
indexer = np.zeros(len(c),dtype='bool')
indexer[0] = True
indexer[-1] = True
c[indexer] = 'c'
expected = Categorical.from_array(['c', 'b', 'b', 'a',
'a', 'c', 'c', 'c'])

self.assert_categorical_equal(c, expected)

def test_setitem_listlike(self):

# GH 9469
# properly coerce the input indexers
np.random.seed(1)
c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8)).add_categories([-1000])
indexer = np.array([100000]).astype(np.int64)
c[indexer] = -1000

# we are asserting the code result here
# which maps to the -1000 category
result = c.codes[np.array([100000]).astype(np.int64)]
self.assertEqual(result, np.array([5], dtype='int8'))

def test_constructor_unsortable(self):

# it works!
Expand Down