diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 7449c62a5ad31..3d478df4c0ea8 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -67,7 +67,76 @@ Backwards incompatible API changes now raise a ``TypeError`` if a not-accepted keyword argument is passed into it. Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median``) (:issue:`31485`) - :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`) -- + +.. _whatsnew_110.api_breaking.indexing_raises_key_errors: + +Failed Label-Based Lookups Always Raise KeyError +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Label lookups ``series[key]``, ``series.loc[key]`` and ``frame.loc[key]`` +used to raises either ``KeyError`` or ``TypeError`` depending on the type of +key and type of :class:`Index`. These now consistently raise ``KeyError`` (:issue:`31867`) + +.. ipython:: python + + ser1 = pd.Series(range(3), index=[0, 1, 2]) + ser2 = pd.Series(range(3), index=pd.date_range("2020-02-01", periods=3)) + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: ser1[1.5] + ... + TypeError: cannot do label indexing on Int64Index with these indexers [1.5] of type float + + In [4] ser1["foo"] + ... + KeyError: 'foo' + + In [5]: ser1.loc[1.5] + ... + TypeError: cannot do label indexing on Int64Index with these indexers [1.5] of type float + + In [6]: ser1.loc["foo"] + ... + KeyError: 'foo' + + In [7]: ser2.loc[1] + ... + TypeError: cannot do label indexing on DatetimeIndex with these indexers [1] of type int + + In [8]: ser2.loc[pd.Timestamp(0)] + ... + KeyError: Timestamp('1970-01-01 00:00:00') + +*New behavior*: + +.. code-block:: ipython + + In [3]: ser1[1.5] + ... + KeyError: 1.5 + + In [4] ser1["foo"] + ... + KeyError: 'foo' + + In [5]: ser1.loc[1.5] + ... + KeyError: 1.5 + + In [6]: ser1.loc["foo"] + ... + KeyError: 'foo' + + In [7]: ser2.loc[1] + ... + KeyError: 1 + + In [8]: ser2.loc[pd.Timestamp(0)] + ... + KeyError: Timestamp('1970-01-01 00:00:00') .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c896e68f7a188..cbb43317e962f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3094,7 +3094,7 @@ def _convert_scalar_indexer(self, key, kind: str_t): if kind == "getitem" and is_float(key): if not self.is_floating(): - self._invalid_indexer("label", key) + raise KeyError(key) elif kind == "loc" and is_float(key): @@ -3108,11 +3108,11 @@ def _convert_scalar_indexer(self, key, kind: str_t): "string", "mixed", ]: - self._invalid_indexer("label", key) + raise KeyError(key) elif kind == "loc" and is_integer(key): if not (is_integer_dtype(self.dtype) or is_object_dtype(self.dtype)): - self._invalid_indexer("label", key) + raise KeyError(key) return key diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index caa6a9a93141f..24c50ea4270a8 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -602,7 +602,7 @@ def _convert_scalar_indexer(self, key, kind: str): try: return self.categories._convert_scalar_indexer(key, kind="loc") except TypeError: - self._invalid_indexer("label", key) + raise KeyError(key) return super()._convert_scalar_indexer(key, kind=kind) @Appender(Index._convert_list_indexer.__doc__) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1b3b6934aa53a..88264303f6ceb 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -396,9 +396,9 @@ def _convert_scalar_indexer(self, key, kind: str): is_int = is_integer(key) is_flt = is_float(key) if kind == "loc" and (is_int or is_flt): - self._invalid_indexer("label", key) + raise KeyError(key) elif kind == "getitem" and is_flt: - self._invalid_indexer("label", key) + raise KeyError(key) return super()._convert_scalar_indexer(key, kind=kind) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1644b4203052b..e302b9ca48f90 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1151,7 +1151,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): # try to find out correct indexer, if not type correct raise try: key = labels._convert_scalar_indexer(key, kind="loc") - except TypeError: + except KeyError: # but we will allow setting if not is_setter: raise diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8c9b7cd060059..1e7732611147f 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1859,11 +1859,7 @@ def check(df): # No NaN found -> error if len(indexer) == 0: - msg = ( - "cannot do label indexing on RangeIndex " - r"with these indexers \[nan\] of type float" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^nan$"): df.loc[:, np.nan] # single nan should result in Series elif len(indexer) == 1: diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 4d3f1b0539aee..a9eb7f94bea80 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -93,11 +93,9 @@ def test_scalar_non_numeric(self, index_func, klass): # getting for idxr, getitem in [(lambda x: x.iloc, False), (lambda x: x, True)]: - # gettitem on a DataFrame is a KeyError as it is indexing - # via labels on the columns - if getitem and isinstance(s, DataFrame): + if getitem: error = KeyError - msg = r"^3(\.0)?$" + msg = r"^3\.0?$" else: error = TypeError msg = ( @@ -116,6 +114,9 @@ def test_scalar_non_numeric(self, index_func, klass): "string", "unicode", "mixed", + "period", + "timedelta64", + "datetime64", }: error = KeyError msg = r"^3\.0$" @@ -183,12 +184,7 @@ def test_scalar_non_numeric_series_fallback(self, index_func): i = index_func(5) s = Series(np.arange(len(i)), index=i) s[3] - msg = ( - r"cannot do (label|positional) indexing " - fr"on {type(i).__name__} with these indexers \[3\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^3.0$"): s[3.0] def test_scalar_with_mixed(self): @@ -199,12 +195,12 @@ def test_scalar_with_mixed(self): # lookup in a pure stringstr # with an invalid indexer msg = ( - "cannot do label indexing " - fr"on {Index.__name__} with these indexers \[1\.0\] of " + r"cannot do label indexing " + r"on Index with these indexers \[1\.0\] of " r"type float|" "Cannot index by location index with a non-integer key" ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^1.0$"): s2[1.0] with pytest.raises(TypeError, match=msg): s2.iloc[1.0] @@ -218,12 +214,7 @@ def test_scalar_with_mixed(self): # mixed index so we have label # indexing - msg = ( - "cannot do label indexing " - fr"on {Index.__name__} with these indexers \[1\.0\] of " - "type float" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match="^1.0$"): s3[1.0] result = s3[1] diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 276d11a67ad18..4d042af8d59b4 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -35,7 +35,7 @@ def test_loc_getitem_label_out_of_range(self): "loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError, ) self.check_result("loc", 20, typs=["labels"], fails=KeyError) - self.check_result("loc", 20, typs=["ts"], axes=0, fails=TypeError) + self.check_result("loc", 20, typs=["ts"], axes=0, fails=KeyError) self.check_result("loc", 20, typs=["floats"], axes=0, fails=KeyError) def test_loc_getitem_label_list(self):