Skip to content

BUG: Do not allow boolean values for indexing with .loc without a boolean index. #40726

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Apr 6, 2021
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,7 @@ Indexing
- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)
- Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`)
- Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`)
- Bug in :meth:`DataFrame.loc` incorrectly allowing the lookup of boolean labels and slices (:issue:`20432`)

Missing
^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1386,7 +1386,7 @@ def is_bool_dtype(arr_or_dtype) -> bool:
# we don't have a boolean Index class
# so its object, we need to infer to
# guess this
return arr_or_dtype.is_object and arr_or_dtype.inferred_type == "boolean"
return arr_or_dtype.is_object() and arr_or_dtype.inferred_type == "boolean"
elif is_extension_array_dtype(arr_or_dtype):
return getattr(dtype, "_is_boolean", False)

Expand Down
11 changes: 8 additions & 3 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -961,12 +961,17 @@ class _LocIndexer(_LocationIndexer):

@doc(_LocationIndexer._validate_key)
def _validate_key(self, key, axis: int):

# valid for a collection of labels (we check their presence later)
# slice of labels (where start-end in labels)
# slice of integers (only if in the labels)
# boolean
pass
# boolean not in slice and with boolean index
if isinstance(key, bool) and not is_bool_dtype(self.obj.index):
raise KeyError("Boolean label can not be used without a boolean index")

if isinstance(key, slice) and (
isinstance(key.start, bool) or isinstance(key.stop, bool)
):
raise TypeError("Boolean values can not be used in a slice")

def _has_valid_setitem_indexer(self, indexer) -> bool:
return True
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,10 +947,10 @@ def test_getitem_ix_boolean_duplicates_multiple(self):
def test_getitem_setitem_ix_bool_keyerror(self):
# #2199
df = DataFrame({"a": [1, 2, 3]})

with pytest.raises(KeyError, match=r"^False$"):
message = "Boolean label can not be used without a boolean index"
with pytest.raises(KeyError, match=message):
df.loc[False]
with pytest.raises(KeyError, match=r"^True$"):
with pytest.raises(KeyError, match=message):
df.loc[True]

msg = "cannot use a single bool to index into setitem"
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2116,6 +2116,29 @@ def test_loc_getitem_slice_columns_mixed_dtype(self):
tm.assert_frame_equal(df.loc[:, 1:], expected)


class TestLocBooleanLabelsAndSlices(Base):
def test_loc_bool_incompatible_index_raises(self, index, frame_or_series):
# GH20432
message = "Boolean label can not be used without a boolean index"
if index.inferred_type != "boolean":
obj = frame_or_series(index=index, dtype="object")
with pytest.raises(KeyError, match=message):
obj.loc[True]

def test_loc_bool_should_not_raise(self, frame_or_series):
obj = frame_or_series(
index=Index([True, False], dtype="boolean"), dtype="object"
)
obj.loc[True]

def test_loc_bool_slice_raises(self, index, frame_or_series):
# GH20432
message = "Boolean values can not be used in a slice"
obj = frame_or_series(index=index, dtype="object")
with pytest.raises(TypeError, match=message):
obj.loc[True:False]


class TestLocBooleanMask:
def test_loc_setitem_bool_mask_timedeltaindex(self):
# GH#14946
Expand Down