Skip to content

API/BUG: raise only KeyError failed on geitem/loc lookups #31867

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 27, 2020
71 changes: 70 additions & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,76 @@ Backwards incompatible API changes
now raise a ``TypeError`` if a not-accepted keyword argument is passed into it.
Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median``) (:issue:`31485`)
- :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`)
-

.. _whatsnew_110.api_breaking.indexing_raises_key_errors:

Failed Label-Based Lookups Always Raise KeyError
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Label lookups ``series[key]``, ``series.loc[key]`` and ``frame.loc[key]``
used to raises either ``KeyError`` or ``TypeError`` depending on the type of
key and type of :class:`Index`. These now consistently raise ``KeyError`` (:issue:`31867`)

.. ipython:: python

ser1 = pd.Series(range(3), index=[0, 1, 2])
ser2 = pd.Series(range(3), index=pd.date_range("2020-02-01", periods=3))

*Previous behavior*:

.. code-block:: ipython

In [3]: ser1[1.5]
...
TypeError: cannot do label indexing on Int64Index with these indexers [1.5] of type float

In [4] ser1["foo"]
...
KeyError: 'foo'

In [5]: ser1.loc[1.5]
...
TypeError: cannot do label indexing on Int64Index with these indexers [1.5] of type float

In [6]: ser1.loc["foo"]
...
KeyError: 'foo'

In [7]: ser2.loc[1]
...
TypeError: cannot do label indexing on DatetimeIndex with these indexers [1] of type int

In [8]: ser2.loc[pd.Timestamp(0)]
...
KeyError: Timestamp('1970-01-01 00:00:00')

*New behavior*:

.. code-block:: ipython

In [3]: ser1[1.5]
...
KeyError: 1.5

In [4] ser1["foo"]
...
KeyError: 'foo'

In [5]: ser1.loc[1.5]
...
KeyError: 1.5

In [6]: ser1.loc["foo"]
...
KeyError: 'foo'

In [7]: ser2.loc[1]
...
KeyError: 1

In [8]: ser2.loc[pd.Timestamp(0)]
...
KeyError: Timestamp('1970-01-01 00:00:00')

.. ---------------------------------------------------------------------------

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3094,7 +3094,7 @@ def _convert_scalar_indexer(self, key, kind: str_t):

if kind == "getitem" and is_float(key):
if not self.is_floating():
self._invalid_indexer("label", key)
raise KeyError(key)

elif kind == "loc" and is_float(key):

Expand All @@ -3108,11 +3108,11 @@ def _convert_scalar_indexer(self, key, kind: str_t):
"string",
"mixed",
]:
self._invalid_indexer("label", key)
raise KeyError(key)

elif kind == "loc" and is_integer(key):
if not (is_integer_dtype(self.dtype) or is_object_dtype(self.dtype)):
self._invalid_indexer("label", key)
raise KeyError(key)

return key

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ def _convert_scalar_indexer(self, key, kind: str):
try:
return self.categories._convert_scalar_indexer(key, kind="loc")
except TypeError:
self._invalid_indexer("label", key)
raise KeyError(key)
return super()._convert_scalar_indexer(key, kind=kind)

@Appender(Index._convert_list_indexer.__doc__)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,9 +396,9 @@ def _convert_scalar_indexer(self, key, kind: str):
is_int = is_integer(key)
is_flt = is_float(key)
if kind == "loc" and (is_int or is_flt):
self._invalid_indexer("label", key)
raise KeyError(key)
elif kind == "getitem" and is_flt:
self._invalid_indexer("label", key)
raise KeyError(key)

return super()._convert_scalar_indexer(key, kind=kind)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,7 +1151,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
# try to find out correct indexer, if not type correct raise
try:
key = labels._convert_scalar_indexer(key, kind="loc")
except TypeError:
except KeyError:
# but we will allow setting
if not is_setter:
raise
Expand Down
6 changes: 1 addition & 5 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1859,11 +1859,7 @@ def check(df):

# No NaN found -> error
if len(indexer) == 0:
msg = (
"cannot do label indexing on RangeIndex "
r"with these indexers \[nan\] of type float"
)
with pytest.raises(TypeError, match=msg):
with pytest.raises(KeyError, match="^nan$"):
df.loc[:, np.nan]
# single nan should result in Series
elif len(indexer) == 1:
Expand Down
29 changes: 10 additions & 19 deletions pandas/tests/indexing/test_floats.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,9 @@ def test_scalar_non_numeric(self, index_func, klass):
# getting
for idxr, getitem in [(lambda x: x.iloc, False), (lambda x: x, True)]:

# gettitem on a DataFrame is a KeyError as it is indexing
# via labels on the columns
if getitem and isinstance(s, DataFrame):
if getitem:
error = KeyError
msg = r"^3(\.0)?$"
msg = r"^3\.0?$"
else:
error = TypeError
msg = (
Expand All @@ -116,6 +114,9 @@ def test_scalar_non_numeric(self, index_func, klass):
"string",
"unicode",
"mixed",
"period",
"timedelta64",
"datetime64",
}:
error = KeyError
msg = r"^3\.0$"
Expand Down Expand Up @@ -183,12 +184,7 @@ def test_scalar_non_numeric_series_fallback(self, index_func):
i = index_func(5)
s = Series(np.arange(len(i)), index=i)
s[3]
msg = (
r"cannot do (label|positional) indexing "
fr"on {type(i).__name__} with these indexers \[3\.0\] of "
"type float"
)
with pytest.raises(TypeError, match=msg):
with pytest.raises(KeyError, match="^3.0$"):
s[3.0]

def test_scalar_with_mixed(self):
Expand All @@ -199,12 +195,12 @@ def test_scalar_with_mixed(self):
# lookup in a pure stringstr
# with an invalid indexer
msg = (
"cannot do label indexing "
fr"on {Index.__name__} with these indexers \[1\.0\] of "
r"cannot do label indexing "
r"on Index with these indexers \[1\.0\] of "
r"type float|"
"Cannot index by location index with a non-integer key"
)
with pytest.raises(TypeError, match=msg):
with pytest.raises(KeyError, match="^1.0$"):
s2[1.0]
with pytest.raises(TypeError, match=msg):
s2.iloc[1.0]
Expand All @@ -218,12 +214,7 @@ def test_scalar_with_mixed(self):

# mixed index so we have label
# indexing
msg = (
"cannot do label indexing "
fr"on {Index.__name__} with these indexers \[1\.0\] of "
"type float"
)
with pytest.raises(TypeError, match=msg):
with pytest.raises(KeyError, match="^1.0$"):
s3[1.0]

result = s3[1]
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_loc_getitem_label_out_of_range(self):
"loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError,
)
self.check_result("loc", 20, typs=["labels"], fails=KeyError)
self.check_result("loc", 20, typs=["ts"], axes=0, fails=TypeError)
self.check_result("loc", 20, typs=["ts"], axes=0, fails=KeyError)
self.check_result("loc", 20, typs=["floats"], axes=0, fails=KeyError)

def test_loc_getitem_label_list(self):
Expand Down