Skip to content

Backport PR #41846 on branch 1.3.x (BUG: DataFrame.at with CategoricalIndex) #42038

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,7 @@ Indexing
- Bug in :meth:`DataFrame.loc.__getitem__` with :class:`MultiIndex` casting to float when at least one index column has float dtype and we retrieve a scalar (:issue:`41369`)
- Bug in :meth:`DataFrame.loc` incorrectly matching non-Boolean index elements (:issue:`20432`)
- Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`)
- Bug in :meth:`DataFrame.at` with a :class:`CategoricalIndex` returning incorrect results when passed integer keys (:issue:`41846`)
- Bug in :meth:`DataFrame.loc` returning a :class:`MultiIndex` in the wrong order if an indexer has duplicates (:issue:`40978`)
- Bug in :meth:`DataFrame.__setitem__` raising a ``TypeError`` when using a ``str`` subclass as the column name with a :class:`DatetimeIndex` (:issue:`37366`)
- Bug in :meth:`PeriodIndex.get_loc` failing to raise a ``KeyError`` when given a :class:`Period` with a mismatched ``freq`` (:issue:`41670`)
Expand Down
29 changes: 18 additions & 11 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@
from pandas.core.indexers import check_key_length
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import (
CategoricalIndex,
DatetimeIndex,
Index,
PeriodIndex,
Expand Down Expand Up @@ -3553,6 +3554,11 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar:
Returns
-------
scalar

Notes
-----
Assumes that index and columns both have ax._index_as_unique;
caller is responsible for checking.
"""
if takeable:
series = self._ixs(col, axis=1)
Expand All @@ -3561,20 +3567,21 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar:
series = self._get_item_cache(col)
engine = self.index._engine

if isinstance(self.index, CategoricalIndex):
# Trying to use the engine fastpath may give incorrect results
# if our categories are integers that dont match our codes
col = self.columns.get_loc(col)
index = self.index.get_loc(index)
return self._get_value(index, col, takeable=True)

try:
loc = engine.get_loc(index)
return series._values[loc]
except KeyError:
# GH 20629
if self.index.nlevels > 1:
# partial indexing forbidden
raise

# we cannot handle direct indexing
# use positional
col = self.columns.get_loc(col)
index = self.index.get_loc(index)
return self._get_value(index, col, takeable=True)
except AttributeError:
# IntervalTree has no get_loc
col = self.columns.get_loc(col)
index = self.index.get_loc(index)
return self._get_value(index, col, takeable=True)

def __setitem__(self, key, value):
key = com.apply_if_callable(key, self)
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,8 +916,7 @@ def __getitem__(self, key):
key = tuple(list(x) if is_iterator(x) else x for x in key)
key = tuple(com.apply_if_callable(x, self.obj) for x in key)
if self._is_scalar_access(key):
with suppress(KeyError, IndexError, AttributeError):
# AttributeError for IntervalTree get_value
with suppress(KeyError, IndexError):
return self.obj._get_value(*key, takeable=self._takeable)
return self._getitem_tuple(key)
else:
Expand Down Expand Up @@ -1004,7 +1003,7 @@ def _is_scalar_access(self, key: tuple) -> bool:
# should not be considered scalar
return False

if not ax.is_unique:
if not ax._index_as_unique:
return False

return True
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/indexing/test_at.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pandas import (
CategoricalDtype,
CategoricalIndex,
DataFrame,
Series,
Timestamp,
Expand Down Expand Up @@ -141,3 +142,16 @@ def test_at_getitem_mixed_index_no_fallback(self):
ser.at[0]
with pytest.raises(KeyError, match="^4$"):
ser.at[4]

def test_at_categorical_integers(self):
# CategoricalIndex with integer categories that don't happen to match
# the Categorical's codes
ci = CategoricalIndex([3, 4])

arr = np.arange(4).reshape(2, 2)
frame = DataFrame(arr, index=ci)

for df in [frame, frame.T]:
for key in [0, 1]:
with pytest.raises(KeyError, match=str(key)):
df.at[key, key]