Skip to content

Commit 62fa51f

Browse files
DriesSchaumontJulianWgs
authored andcommitted
BUG: Do not allow boolean values for indexing with .loc without a boolean index. (pandas-dev#40726)
* Add boolean loc tests. * BUG: Raise TypeError when using boolean indexer with loc. * Fix styling issues. * Adjustments for review. * Add whatsnew entry. * Switch boolean statements. * Switch to KeyError. * Switch to KeyError pt.2 * Remove stray dot in whatsnew * Fix DeprecationWarning. * Fix DeprecationWarning pt.2 * Adjustments to comments. * Adjustments to comments pt. 2.
1 parent 72b1afa commit 62fa51f

File tree

5 files changed

+47
-13
lines changed

5 files changed

+47
-13
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,7 @@ Indexing
674674
- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)
675675
- Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`)
676676
- Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`)
677+
- Bug in :meth:`DataFrame.loc` incorrectly matching non-boolean index elements (:issue:`20432`)
677678
- Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`)
678679

679680
Missing

pandas/core/dtypes/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1386,7 +1386,7 @@ def is_bool_dtype(arr_or_dtype) -> bool:
13861386
# we don't have a boolean Index class
13871387
# so its object, we need to infer to
13881388
# guess this
1389-
return arr_or_dtype.is_object and arr_or_dtype.inferred_type == "boolean"
1389+
return arr_or_dtype.is_object() and arr_or_dtype.inferred_type == "boolean"
13901390
elif is_extension_array_dtype(arr_or_dtype):
13911391
return getattr(dtype, "_is_boolean", False)
13921392

pandas/core/indexing.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -958,12 +958,19 @@ class _LocIndexer(_LocationIndexer):
958958

959959
@doc(_LocationIndexer._validate_key)
960960
def _validate_key(self, key, axis: int):
961-
962961
# valid for a collection of labels (we check their presence later)
963962
# slice of labels (where start-end in labels)
964963
# slice of integers (only if in the labels)
965-
# boolean
966-
pass
964+
# boolean not in slice and with boolean index
965+
if isinstance(key, bool) and not is_bool_dtype(self.obj.index):
966+
raise KeyError(
967+
f"{key}: boolean label can not be used without a boolean index"
968+
)
969+
970+
if isinstance(key, slice) and (
971+
isinstance(key.start, bool) or isinstance(key.stop, bool)
972+
):
973+
raise TypeError(f"{key}: boolean values can not be used in a slice")
967974

968975
def _has_valid_setitem_indexer(self, indexer) -> bool:
969976
return True

pandas/tests/frame/indexing/test_indexing.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -944,20 +944,17 @@ def test_getitem_ix_boolean_duplicates_multiple(self):
944944
exp = df[df[0] > 0]
945945
tm.assert_frame_equal(result, exp)
946946

947-
def test_getitem_setitem_ix_bool_keyerror(self):
947+
@pytest.mark.parametrize("bool_value", [True, False])
948+
def test_getitem_setitem_ix_bool_keyerror(self, bool_value):
948949
# #2199
949950
df = DataFrame({"a": [1, 2, 3]})
950-
951-
with pytest.raises(KeyError, match=r"^False$"):
952-
df.loc[False]
953-
with pytest.raises(KeyError, match=r"^True$"):
954-
df.loc[True]
951+
message = f"{bool_value}: boolean label can not be used without a boolean index"
952+
with pytest.raises(KeyError, match=message):
953+
df.loc[bool_value]
955954

956955
msg = "cannot use a single bool to index into setitem"
957956
with pytest.raises(KeyError, match=msg):
958-
df.loc[False] = 0
959-
with pytest.raises(KeyError, match=msg):
960-
df.loc[True] = 0
957+
df.loc[bool_value] = 0
961958

962959
# TODO: rename? remove?
963960
def test_single_element_ix_dont_upcast(self, float_frame):

pandas/tests/indexing/test_loc.py

+29
Original file line numberDiff line numberDiff line change
@@ -2116,6 +2116,35 @@ def test_loc_getitem_slice_columns_mixed_dtype(self):
21162116
tm.assert_frame_equal(df.loc[:, 1:], expected)
21172117

21182118

2119+
class TestLocBooleanLabelsAndSlices(Base):
2120+
@pytest.mark.parametrize("bool_value", [True, False])
2121+
def test_loc_bool_incompatible_index_raises(
2122+
self, index, frame_or_series, bool_value
2123+
):
2124+
# GH20432
2125+
message = f"{bool_value}: boolean label can not be used without a boolean index"
2126+
if index.inferred_type != "boolean":
2127+
obj = frame_or_series(index=index, dtype="object")
2128+
with pytest.raises(KeyError, match=message):
2129+
obj.loc[bool_value]
2130+
2131+
@pytest.mark.parametrize("bool_value", [True, False])
2132+
def test_loc_bool_should_not_raise(self, frame_or_series, bool_value):
2133+
obj = frame_or_series(
2134+
index=Index([True, False], dtype="boolean"), dtype="object"
2135+
)
2136+
obj.loc[bool_value]
2137+
2138+
def test_loc_bool_slice_raises(self, index, frame_or_series):
2139+
# GH20432
2140+
message = (
2141+
r"slice\(True, False, None\): boolean values can not be used in a slice"
2142+
)
2143+
obj = frame_or_series(index=index, dtype="object")
2144+
with pytest.raises(TypeError, match=message):
2145+
obj.loc[True:False]
2146+
2147+
21192148
class TestLocBooleanMask:
21202149
def test_loc_setitem_bool_mask_timedeltaindex(self):
21212150
# GH#14946

0 commit comments

Comments
 (0)