Skip to content

REF: avoid accessing ._engine in Series/DataFrame #41956

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 17, 2021
32 changes: 13 additions & 19 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@
from pandas.core.indexers import check_key_length
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import (
CategoricalIndex,
DatetimeIndex,
Index,
PeriodIndex,
Expand Down Expand Up @@ -3557,8 +3556,8 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar:

Notes
-----
Assumes that index and columns both have ax._index_as_unique;
caller is responsible for checking.
Assumes that both `self.index._index_as_unique` and
`self.columns._index_as_unique`; Caller is responsible for checking.
"""
if takeable:
series = self._ixs(col, axis=1)
Expand All @@ -3567,21 +3566,17 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar:
series = self._get_item_cache(col)
engine = self.index._engine

if isinstance(self.index, CategoricalIndex):
# Trying to use the engine fastpath may give incorrect results
# if our categories are integers that dont match our codes
col = self.columns.get_loc(col)
index = self.index.get_loc(index)
return self._get_value(index, col, takeable=True)
if not isinstance(self.index, MultiIndex):
# CategoricalIndex: Trying to use the engine fastpath may give incorrect
# results if our categories are integers that dont match our codes
# IntervalIndex: IntervalTree has no get_loc
row = self.index.get_loc(index)
return series._values[row]

try:
loc = engine.get_loc(index)
return series._values[loc]
except AttributeError:
# IntervalTree has no get_loc
col = self.columns.get_loc(col)
index = self.index.get_loc(index)
return self._get_value(index, col, takeable=True)
# For MultiIndex going through engine effectively restricts us to
# same-length tuples; see test_get_set_value_no_partial_indexing
loc = engine.get_loc(index)
return series._values[loc]

def __setitem__(self, key, value):
key = com.apply_if_callable(key, self)
Expand Down Expand Up @@ -3816,8 +3811,7 @@ def _set_value(
return

series = self._get_item_cache(col)
engine = self.index._engine
loc = engine.get_loc(index)
loc = self.index.get_loc(index)
validate_numeric_casting(series.dtype, value)

series._values[loc] = value
Expand Down
15 changes: 5 additions & 10 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,8 +916,7 @@ def __getitem__(self, key):
key = tuple(list(x) if is_iterator(x) else x for x in key)
key = tuple(com.apply_if_callable(x, self.obj) for x in key)
if self._is_scalar_access(key):
with suppress(KeyError, IndexError):
return self.obj._get_value(*key, takeable=self._takeable)
return self.obj._get_value(*key, takeable=self._takeable)
return self._getitem_tuple(key)
else:
# we by definition only have the 0th axis
Expand Down Expand Up @@ -2187,7 +2186,7 @@ class _ScalarAccessIndexer(NDFrameIndexerBase):
Access scalars quickly.
"""

def _convert_key(self, key, is_setter: bool = False):
def _convert_key(self, key):
raise AbstractMethodError(self)

def __getitem__(self, key):
Expand All @@ -2211,7 +2210,7 @@ def __setitem__(self, key, value):

if not isinstance(key, tuple):
key = _tuplify(self.ndim, key)
key = list(self._convert_key(key, is_setter=True))
key = list(self._convert_key(key))
if len(key) != self.ndim:
raise ValueError("Not enough indexers for scalar access (setting)!")

Expand All @@ -2222,7 +2221,7 @@ def __setitem__(self, key, value):
class _AtIndexer(_ScalarAccessIndexer):
_takeable = False

def _convert_key(self, key, is_setter: bool = False):
def _convert_key(self, key):
"""
Require they keys to be the same type as the index. (so we don't
fallback)
Expand All @@ -2233,10 +2232,6 @@ def _convert_key(self, key, is_setter: bool = False):
if self.ndim == 1 and len(key) > 1:
key = (key,)

# allow arbitrary setting
if is_setter:
return list(key)

return key

@property
Expand Down Expand Up @@ -2271,7 +2266,7 @@ def __setitem__(self, key, value):
class _iAtIndexer(_ScalarAccessIndexer):
_takeable = True

def _convert_key(self, key, is_setter: bool = False):
def _convert_key(self, key):
"""
Require integer args. (and convert to label arguments)
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ def test_iloc_getitem_labelled_frame(self):
assert result == exp

# out-of-bounds exception
msg = "single positional indexer is out-of-bounds"
msg = "index 5 is out of bounds for axis 0 with size 4"
with pytest.raises(IndexError, match=msg):
df.iloc[10, 5]

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ def test_string_slice_empty(self):
with pytest.raises(KeyError, match="'2011'"):
df["2011"]

with pytest.raises(KeyError, match="'2011'"):
with pytest.raises(KeyError, match="0"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for KeyErrors can you match with ^ and $

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated+green

df.loc["2011", 0]

def test_astype_assignment(self):
Expand Down