diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 5e95cd6e5ee10..21e6f0ea57451 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -674,6 +674,7 @@ Indexing - Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`) - Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`) - Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`) +- Bug in :meth:`DataFrame.loc` incorrectly matching non-boolean index elements (:issue:`20432`) - Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`) Missing diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 3d8d189046d8a..59550927299fe 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1386,7 +1386,7 @@ def is_bool_dtype(arr_or_dtype) -> bool: # we don't have a boolean Index class # so its object, we need to infer to # guess this - return arr_or_dtype.is_object and arr_or_dtype.inferred_type == "boolean" + return arr_or_dtype.is_object() and arr_or_dtype.inferred_type == "boolean" elif is_extension_array_dtype(arr_or_dtype): return getattr(dtype, "_is_boolean", False) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4c8a6a200b196..143f7aadc1594 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -961,12 +961,19 @@ class _LocIndexer(_LocationIndexer): @doc(_LocationIndexer._validate_key) def _validate_key(self, key, axis: int): - # valid for a collection of labels (we check their presence later) # slice of labels (where start-end in labels) # slice of integers (only if in the labels) - # boolean - pass + # boolean not in slice and with boolean index + if isinstance(key, bool) and not is_bool_dtype(self.obj.index): + raise KeyError( + f"{key}: boolean label can not be used without a boolean index" + ) + + if isinstance(key, slice) and ( + isinstance(key.start, bool) or isinstance(key.stop, bool) + ): + raise TypeError(f"{key}: boolean values can not be used in a slice") def _has_valid_setitem_indexer(self, indexer) -> bool: return True diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 928b42b915b18..e2121fa2318eb 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -944,20 +944,17 @@ def test_getitem_ix_boolean_duplicates_multiple(self): exp = df[df[0] > 0] tm.assert_frame_equal(result, exp) - def test_getitem_setitem_ix_bool_keyerror(self): + @pytest.mark.parametrize("bool_value", [True, False]) + def test_getitem_setitem_ix_bool_keyerror(self, bool_value): # #2199 df = DataFrame({"a": [1, 2, 3]}) - - with pytest.raises(KeyError, match=r"^False$"): - df.loc[False] - with pytest.raises(KeyError, match=r"^True$"): - df.loc[True] + message = f"{bool_value}: boolean label can not be used without a boolean index" + with pytest.raises(KeyError, match=message): + df.loc[bool_value] msg = "cannot use a single bool to index into setitem" with pytest.raises(KeyError, match=msg): - df.loc[False] = 0 - with pytest.raises(KeyError, match=msg): - df.loc[True] = 0 + df.loc[bool_value] = 0 # TODO: rename? remove? def test_single_element_ix_dont_upcast(self, float_frame): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 734cf13289c1f..eac46fb64b65e 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2116,6 +2116,35 @@ def test_loc_getitem_slice_columns_mixed_dtype(self): tm.assert_frame_equal(df.loc[:, 1:], expected) +class TestLocBooleanLabelsAndSlices(Base): + @pytest.mark.parametrize("bool_value", [True, False]) + def test_loc_bool_incompatible_index_raises( + self, index, frame_or_series, bool_value + ): + # GH20432 + message = f"{bool_value}: boolean label can not be used without a boolean index" + if index.inferred_type != "boolean": + obj = frame_or_series(index=index, dtype="object") + with pytest.raises(KeyError, match=message): + obj.loc[bool_value] + + @pytest.mark.parametrize("bool_value", [True, False]) + def test_loc_bool_should_not_raise(self, frame_or_series, bool_value): + obj = frame_or_series( + index=Index([True, False], dtype="boolean"), dtype="object" + ) + obj.loc[bool_value] + + def test_loc_bool_slice_raises(self, index, frame_or_series): + # GH20432 + message = ( + r"slice\(True, False, None\): boolean values can not be used in a slice" + ) + obj = frame_or_series(index=index, dtype="object") + with pytest.raises(TypeError, match=message): + obj.loc[True:False] + + class TestLocBooleanMask: def test_loc_setitem_bool_mask_timedeltaindex(self): # GH#14946