From cfdac1ecbae9183b85874c68563aec04ec8cf8ff Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Tue, 30 Mar 2021 16:37:02 +0200 Subject: [PATCH 01/13] Add boolean loc tests. --- pandas/tests/indexing/test_loc.py | 39 +++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 85accac5a8235..b55231c474e24 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -25,16 +25,19 @@ IndexSlice, MultiIndex, Period, + RangeIndex, Series, SparseDtype, Timedelta, Timestamp, + UInt64Index, date_range, timedelta_range, to_datetime, to_timedelta, ) import pandas._testing as tm +from pandas.errors import InvalidIndexError from pandas.api.types import is_scalar from pandas.tests.indexing.common import Base @@ -2084,6 +2087,42 @@ def test_loc_getitem_slice_columns_mixed_dtype(self): tm.assert_frame_equal(df.loc[:, 1:], expected) +class TestLocBooleanLabelsAndSlices(Base): + @pytest.mark.parametrize( + "msg, error_type, index", + [ + (f"", KeyError, pd.RangeIndex(4)), + (f"", KeyError, pd.Int64Index(range(4))), + (f"", KeyError, pd.UInt64Index(range(4))), + (f"", KeyError, pd.Float64Index(range(4))), + (f"", KeyError, pd.CategoricalIndex(range(4))), + (f"", KeyError, pd.date_range(0, periods=4, freq="ns")), + (f"", KeyError, pd.timedelta_range(0, periods=4, freq="ns")), + (f"", InvalidIndexError, pd.interval_range(0, periods=4)), + (f"", KeyError, pd.Index([0, 1, 2, 3], dtype=object)), + (f"", KeyError, pd.MultiIndex.from_product([[0, 1], [0, 1]])), + (f"", KeyError, pd.period_range("2018Q1", freq="Q", periods=4)) + ] + ) + def test_loc_bool_slice_incompatible_index_raises(self, msg, error_type, index): + # GH20432 + df = DataFrame(range(4)) + df.index = index + with pytest.raises(error_type, match=msg): + df.loc[True] + + @pytest.mark.parametrize( + "index", + [ + pd.Index([False, False], dtype=bool) + ] + ) + def test_loc_bool_should_not_raise(self, index): + df = DataFrame(range(2)) + df.index = index + result = df.loc[True] + + class TestLocBooleanMask: def test_loc_setitem_bool_mask_timedeltaindex(self): # GH#14946 From 0df3a10846dc6a767c4895724c91060068e845ba Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Wed, 31 Mar 2021 20:43:18 +0200 Subject: [PATCH 02/13] BUG: Raise TypeError when using boolean indexer with loc. --- pandas/core/dtypes/common.py | 2 +- pandas/core/indexing.py | 14 ++-- pandas/tests/frame/indexing/test_indexing.py | 10 +-- pandas/tests/indexing/test_loc.py | 67 +++++++++++++------- 4 files changed, 60 insertions(+), 33 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index b9e785ff2f887..e9bd30c8b9ce5 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1386,7 +1386,7 @@ def is_bool_dtype(arr_or_dtype) -> bool: # we don't have a boolean Index class # so its object, we need to infer to # guess this - return arr_or_dtype.is_object and arr_or_dtype.inferred_type == "boolean" + return arr_or_dtype.is_object() and arr_or_dtype.inferred_type == "boolean" elif is_extension_array_dtype(arr_or_dtype): return getattr(dtype, "_is_boolean", False) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ae0f853db628e..feed8abaeca17 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -17,7 +17,7 @@ from pandas._config.config import option_context from pandas._libs.indexing import NDFrameIndexerBase -from pandas._libs.lib import item_from_zerodim +from pandas._libs.lib import item_from_zerodim, infer_dtype from pandas.errors import ( AbstractMethodError, InvalidIndexError, @@ -961,12 +961,16 @@ class _LocIndexer(_LocationIndexer): @doc(_LocationIndexer._validate_key) def _validate_key(self, key, axis: int): - # valid for a collection of labels (we check their presence later) # slice of labels (where start-end in labels) # slice of integers (only if in the labels) - # boolean - pass + # boolean not in slice and with boolean index + if not is_bool_dtype(self.obj.index) and isinstance(key, bool): + raise TypeError('Boolean label can not be used without a boolean index') + + if isinstance(key, slice) and \ + (isinstance(key.start, bool) or isinstance(key.stop, bool)): + raise TypeError('Boolean values can not be used in a slice') def _has_valid_setitem_indexer(self, indexer) -> bool: return True @@ -2366,7 +2370,7 @@ def convert_missing_indexer(indexer): indexer = indexer["key"] if isinstance(indexer, bool): - raise KeyError("cannot use a single bool to index into setitem") + raise TypeError("cannot use a single bool to index into setitem") return indexer, True return indexer, False diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 928b42b915b18..da642e06ca0be 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -947,16 +947,16 @@ def test_getitem_ix_boolean_duplicates_multiple(self): def test_getitem_setitem_ix_bool_keyerror(self): # #2199 df = DataFrame({"a": [1, 2, 3]}) - - with pytest.raises(KeyError, match=r"^False$"): + message = "Boolean label can not be used without a boolean index" + with pytest.raises(TypeError, match=message): df.loc[False] - with pytest.raises(KeyError, match=r"^True$"): + with pytest.raises(TypeError, match=message): df.loc[True] msg = "cannot use a single bool to index into setitem" - with pytest.raises(KeyError, match=msg): + with pytest.raises(TypeError, match=msg): df.loc[False] = 0 - with pytest.raises(KeyError, match=msg): + with pytest.raises(TypeError, match=msg): df.loc[True] = 0 # TODO: rename? remove? diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b55231c474e24..3fd301a298689 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2089,38 +2089,61 @@ def test_loc_getitem_slice_columns_mixed_dtype(self): class TestLocBooleanLabelsAndSlices(Base): @pytest.mark.parametrize( - "msg, error_type, index", + "index", [ - (f"", KeyError, pd.RangeIndex(4)), - (f"", KeyError, pd.Int64Index(range(4))), - (f"", KeyError, pd.UInt64Index(range(4))), - (f"", KeyError, pd.Float64Index(range(4))), - (f"", KeyError, pd.CategoricalIndex(range(4))), - (f"", KeyError, pd.date_range(0, periods=4, freq="ns")), - (f"", KeyError, pd.timedelta_range(0, periods=4, freq="ns")), - (f"", InvalidIndexError, pd.interval_range(0, periods=4)), - (f"", KeyError, pd.Index([0, 1, 2, 3], dtype=object)), - (f"", KeyError, pd.MultiIndex.from_product([[0, 1], [0, 1]])), - (f"", KeyError, pd.period_range("2018Q1", freq="Q", periods=4)) + pd.RangeIndex(4), + pd.Int64Index(range(4)), + pd.UInt64Index(range(4)), + pd.Float64Index(range(4)), + pd.CategoricalIndex(range(4)), + pd.date_range(0, periods=4, freq="ns"), + pd.timedelta_range(0, periods=4, freq="ns"), + pd.interval_range(0, periods=4), + pd.Index([0, 1, 2, 3], dtype=object), + pd.MultiIndex.from_product([[0, 1], [0, 1]]), + pd.period_range("2018Q1", freq="Q", periods=4) ] ) - def test_loc_bool_slice_incompatible_index_raises(self, msg, error_type, index): + def test_loc_bool_incompatible_index_raises(self, index, frame_or_series): # GH20432 - df = DataFrame(range(4)) - df.index = index - with pytest.raises(error_type, match=msg): - df.loc[True] + message = 'Boolean label can not be used without a boolean index' + obj = frame_or_series(range(4), index=index) + with pytest.raises(TypeError, match=message): + obj.loc[True] + + @pytest.mark.parametrize( + "index", + [ + pd.Index([True, False], dtype="boolean") + ] + ) + def test_loc_bool_should_not_raise(self, index, frame_or_series): + obj = frame_or_series(range(2), index=index) + obj.loc[True] @pytest.mark.parametrize( "index", [ - pd.Index([False, False], dtype=bool) + pd.RangeIndex(4), + pd.Int64Index(range(4)), + pd.UInt64Index(range(4)), + pd.Float64Index(range(4)), + pd.CategoricalIndex(range(4)), + pd.date_range(0, periods=4, freq="ns"), + pd.timedelta_range(0, periods=4, freq="ns"), + pd.interval_range(0, periods=4), + pd.Index([0, 1, 2, 3], dtype=object), + pd.Index([True, True, False, False], dtype=object), + pd.MultiIndex.from_product([[0, 1], [0, 1]]), + pd.period_range("2018Q1", freq="Q", periods=4) ] ) - def test_loc_bool_should_not_raise(self, index): - df = DataFrame(range(2)) - df.index = index - result = df.loc[True] + def test_loc_bool_slice_raises(self, index, frame_or_series): + # GH20432 + message = 'Boolean values can not be used in a slice' + obj = frame_or_series(range(4), index=index) + with pytest.raises(TypeError, match=message): + obj.loc[True:False] class TestLocBooleanMask: From ee07ee0296920e30c8f0526270a909ff93f86a17 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Thu, 1 Apr 2021 13:46:42 +0200 Subject: [PATCH 03/13] Fix styling issues. --- pandas/core/indexing.py | 11 ++++--- pandas/tests/indexing/test_loc.py | 55 ++++++++++++++----------------- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1039a8be12f39..dc04c9b43ddb5 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -17,7 +17,7 @@ from pandas._config.config import option_context from pandas._libs.indexing import NDFrameIndexerBase -from pandas._libs.lib import item_from_zerodim, infer_dtype +from pandas._libs.lib import item_from_zerodim from pandas.errors import ( AbstractMethodError, InvalidIndexError, @@ -966,11 +966,12 @@ def _validate_key(self, key, axis: int): # slice of integers (only if in the labels) # boolean not in slice and with boolean index if not is_bool_dtype(self.obj.index) and isinstance(key, bool): - raise TypeError('Boolean label can not be used without a boolean index') + raise TypeError("Boolean label can not be used without a boolean index") - if isinstance(key, slice) and \ - (isinstance(key.start, bool) or isinstance(key.stop, bool)): - raise TypeError('Boolean values can not be used in a slice') + if isinstance(key, slice) and ( + isinstance(key.start, bool) or isinstance(key.stop, bool) + ): + raise TypeError("Boolean values can not be used in a slice") def _has_valid_setitem_indexer(self, indexer) -> bool: return True diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 67ecc5743e4f7..7fe8e97dad5f0 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -23,6 +23,7 @@ DatetimeIndex, Index, IndexSlice, + Int64Index, MultiIndex, Period, RangeIndex, @@ -37,7 +38,6 @@ to_timedelta, ) import pandas._testing as tm -from pandas.errors import InvalidIndexError from pandas.api.types import is_scalar from pandas.tests.indexing.common import Base @@ -2092,32 +2092,27 @@ class TestLocBooleanLabelsAndSlices(Base): @pytest.mark.parametrize( "index", [ - pd.RangeIndex(4), - pd.Int64Index(range(4)), - pd.UInt64Index(range(4)), + RangeIndex(4), + Int64Index(range(4)), + UInt64Index(range(4)), pd.Float64Index(range(4)), - pd.CategoricalIndex(range(4)), - pd.date_range(0, periods=4, freq="ns"), - pd.timedelta_range(0, periods=4, freq="ns"), + CategoricalIndex(range(4)), + date_range(0, periods=4, freq="ns"), + timedelta_range(0, periods=4, freq="ns"), pd.interval_range(0, periods=4), - pd.Index([0, 1, 2, 3], dtype=object), - pd.MultiIndex.from_product([[0, 1], [0, 1]]), - pd.period_range("2018Q1", freq="Q", periods=4) - ] + Index([0, 1, 2, 3], dtype=object), + MultiIndex.from_product([[0, 1], [0, 1]]), + pd.period_range("2018Q1", freq="Q", periods=4), + ], ) def test_loc_bool_incompatible_index_raises(self, index, frame_or_series): # GH20432 - message = 'Boolean label can not be used without a boolean index' + message = "Boolean label can not be used without a boolean index" obj = frame_or_series(range(4), index=index) with pytest.raises(TypeError, match=message): obj.loc[True] - @pytest.mark.parametrize( - "index", - [ - pd.Index([True, False], dtype="boolean") - ] - ) + @pytest.mark.parametrize("index", [Index([True, False], dtype="boolean")]) def test_loc_bool_should_not_raise(self, index, frame_or_series): obj = frame_or_series(range(2), index=index) obj.loc[True] @@ -2125,23 +2120,23 @@ def test_loc_bool_should_not_raise(self, index, frame_or_series): @pytest.mark.parametrize( "index", [ - pd.RangeIndex(4), - pd.Int64Index(range(4)), - pd.UInt64Index(range(4)), + RangeIndex(4), + Int64Index(range(4)), + UInt64Index(range(4)), pd.Float64Index(range(4)), - pd.CategoricalIndex(range(4)), - pd.date_range(0, periods=4, freq="ns"), - pd.timedelta_range(0, periods=4, freq="ns"), + CategoricalIndex(range(4)), + date_range(0, periods=4, freq="ns"), + timedelta_range(0, periods=4, freq="ns"), pd.interval_range(0, periods=4), - pd.Index([0, 1, 2, 3], dtype=object), - pd.Index([True, True, False, False], dtype=object), - pd.MultiIndex.from_product([[0, 1], [0, 1]]), - pd.period_range("2018Q1", freq="Q", periods=4) - ] + Index([0, 1, 2, 3], dtype=object), + Index([True, True, False, False], dtype=object), + MultiIndex.from_product([[0, 1], [0, 1]]), + pd.period_range("2018Q1", freq="Q", periods=4), + ], ) def test_loc_bool_slice_raises(self, index, frame_or_series): # GH20432 - message = 'Boolean values can not be used in a slice' + message = "Boolean values can not be used in a slice" obj = frame_or_series(range(4), index=index) with pytest.raises(TypeError, match=message): obj.loc[True:False] From 78c9151aedfa3b10a53749c91445941f7721eb75 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Fri, 2 Apr 2021 19:12:32 +0200 Subject: [PATCH 04/13] Adjustments for review. --- pandas/tests/indexing/test_loc.py | 50 +++++-------------------------- 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 7fe8e97dad5f0..b9caf9d785e4a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -23,15 +23,12 @@ DatetimeIndex, Index, IndexSlice, - Int64Index, MultiIndex, Period, - RangeIndex, Series, SparseDtype, Timedelta, Timestamp, - UInt64Index, date_range, timedelta_range, to_datetime, @@ -2089,55 +2086,22 @@ def test_loc_getitem_slice_columns_mixed_dtype(self): class TestLocBooleanLabelsAndSlices(Base): - @pytest.mark.parametrize( - "index", - [ - RangeIndex(4), - Int64Index(range(4)), - UInt64Index(range(4)), - pd.Float64Index(range(4)), - CategoricalIndex(range(4)), - date_range(0, periods=4, freq="ns"), - timedelta_range(0, periods=4, freq="ns"), - pd.interval_range(0, periods=4), - Index([0, 1, 2, 3], dtype=object), - MultiIndex.from_product([[0, 1], [0, 1]]), - pd.period_range("2018Q1", freq="Q", periods=4), - ], - ) def test_loc_bool_incompatible_index_raises(self, index, frame_or_series): # GH20432 message = "Boolean label can not be used without a boolean index" - obj = frame_or_series(range(4), index=index) - with pytest.raises(TypeError, match=message): - obj.loc[True] + if index.inferred_type != "boolean": + obj = frame_or_series(index=index) + with pytest.raises(TypeError, match=message): + obj.loc[True] - @pytest.mark.parametrize("index", [Index([True, False], dtype="boolean")]) - def test_loc_bool_should_not_raise(self, index, frame_or_series): - obj = frame_or_series(range(2), index=index) + def test_loc_bool_should_not_raise(self, frame_or_series): + obj = frame_or_series(index=Index([True, False], dtype="boolean")) obj.loc[True] - @pytest.mark.parametrize( - "index", - [ - RangeIndex(4), - Int64Index(range(4)), - UInt64Index(range(4)), - pd.Float64Index(range(4)), - CategoricalIndex(range(4)), - date_range(0, periods=4, freq="ns"), - timedelta_range(0, periods=4, freq="ns"), - pd.interval_range(0, periods=4), - Index([0, 1, 2, 3], dtype=object), - Index([True, True, False, False], dtype=object), - MultiIndex.from_product([[0, 1], [0, 1]]), - pd.period_range("2018Q1", freq="Q", periods=4), - ], - ) def test_loc_bool_slice_raises(self, index, frame_or_series): # GH20432 message = "Boolean values can not be used in a slice" - obj = frame_or_series(range(4), index=index) + obj = frame_or_series(index=index) with pytest.raises(TypeError, match=message): obj.loc[True:False] From dccbb94668f7765d272d51d33d9957b7feff8881 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Fri, 2 Apr 2021 20:24:28 +0200 Subject: [PATCH 05/13] Add whatsnew entry. --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 63902b53ea36d..7fafaede85c55 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -614,6 +614,7 @@ Indexing - Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`) - Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`) - Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`) +- Bug in :meth:`DataFrame.loc` incorrectly allowing the lookup of boolean labels and slices (:issue:`20432`). Missing ^^^^^^^ From c8776b8cc0c32b0ad8f7d89b7d1edcd33daf1052 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Fri, 2 Apr 2021 20:27:38 +0200 Subject: [PATCH 06/13] Switch boolean statements. --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index dc04c9b43ddb5..eac3d175e6607 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -965,7 +965,7 @@ def _validate_key(self, key, axis: int): # slice of labels (where start-end in labels) # slice of integers (only if in the labels) # boolean not in slice and with boolean index - if not is_bool_dtype(self.obj.index) and isinstance(key, bool): + if isinstance(key, bool) and not is_bool_dtype(self.obj.index): raise TypeError("Boolean label can not be used without a boolean index") if isinstance(key, slice) and ( From 60c44ffc4080aaf4fa635c2d7c677bbb54a48a61 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Fri, 2 Apr 2021 20:33:46 +0200 Subject: [PATCH 07/13] Switch to KeyError. --- pandas/core/indexing.py | 2 +- pandas/tests/frame/indexing/test_indexing.py | 4 ++-- pandas/tests/indexing/test_loc.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index eac3d175e6607..009698225d35c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -966,7 +966,7 @@ def _validate_key(self, key, axis: int): # slice of integers (only if in the labels) # boolean not in slice and with boolean index if isinstance(key, bool) and not is_bool_dtype(self.obj.index): - raise TypeError("Boolean label can not be used without a boolean index") + raise KeyError("Boolean label can not be used without a boolean index") if isinstance(key, slice) and ( isinstance(key.start, bool) or isinstance(key.stop, bool) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index da642e06ca0be..98c14c23f2557 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -948,9 +948,9 @@ def test_getitem_setitem_ix_bool_keyerror(self): # #2199 df = DataFrame({"a": [1, 2, 3]}) message = "Boolean label can not be used without a boolean index" - with pytest.raises(TypeError, match=message): + with pytest.raises(KeyError, match=message): df.loc[False] - with pytest.raises(TypeError, match=message): + with pytest.raises(KeyError, match=message): df.loc[True] msg = "cannot use a single bool to index into setitem" diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b9caf9d785e4a..ee02a95c7d391 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2091,7 +2091,7 @@ def test_loc_bool_incompatible_index_raises(self, index, frame_or_series): message = "Boolean label can not be used without a boolean index" if index.inferred_type != "boolean": obj = frame_or_series(index=index) - with pytest.raises(TypeError, match=message): + with pytest.raises(KeyError, match=message): obj.loc[True] def test_loc_bool_should_not_raise(self, frame_or_series): From c789abd1d48340e5cc486001f154f792ee9c5c93 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Fri, 2 Apr 2021 20:38:05 +0200 Subject: [PATCH 08/13] Switch to KeyError pt.2 --- pandas/core/indexing.py | 2 +- pandas/tests/frame/indexing/test_indexing.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 009698225d35c..f7441ac6db726 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2371,7 +2371,7 @@ def convert_missing_indexer(indexer): indexer = indexer["key"] if isinstance(indexer, bool): - raise TypeError("cannot use a single bool to index into setitem") + raise KeyError("cannot use a single bool to index into setitem") return indexer, True return indexer, False diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 98c14c23f2557..b64e2d94113b5 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -954,9 +954,9 @@ def test_getitem_setitem_ix_bool_keyerror(self): df.loc[True] msg = "cannot use a single bool to index into setitem" - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match=msg): df.loc[False] = 0 - with pytest.raises(TypeError, match=msg): + with pytest.raises(KeyError, match=msg): df.loc[True] = 0 # TODO: rename? remove? From b2823278e85c87dd4be8e196b0e1da27bd70f4e3 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Fri, 2 Apr 2021 20:41:26 +0200 Subject: [PATCH 09/13] Remove stray dot in whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 7fafaede85c55..6001457633ae7 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -614,7 +614,7 @@ Indexing - Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`) - Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`) - Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`) -- Bug in :meth:`DataFrame.loc` incorrectly allowing the lookup of boolean labels and slices (:issue:`20432`). +- Bug in :meth:`DataFrame.loc` incorrectly allowing the lookup of boolean labels and slices (:issue:`20432`) Missing ^^^^^^^ From 938ed73a460c4b0963ff2a69642769814d90678a Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Sat, 3 Apr 2021 16:25:22 +0200 Subject: [PATCH 10/13] Fix DeprecationWarning. --- pandas/tests/indexing/test_loc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 21a35dea50479..7da6bb66d2d95 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2121,7 +2121,7 @@ def test_loc_bool_incompatible_index_raises(self, index, frame_or_series): # GH20432 message = "Boolean label can not be used without a boolean index" if index.inferred_type != "boolean": - obj = frame_or_series(index=index) + obj = frame_or_series(index=index, dtype="object") with pytest.raises(KeyError, match=message): obj.loc[True] @@ -2132,7 +2132,7 @@ def test_loc_bool_should_not_raise(self, frame_or_series): def test_loc_bool_slice_raises(self, index, frame_or_series): # GH20432 message = "Boolean values can not be used in a slice" - obj = frame_or_series(index=index) + obj = frame_or_series(index=index, dtype="object") with pytest.raises(TypeError, match=message): obj.loc[True:False] From 7f35cf8a3bc44d02dfd24860bef9669c7edf8386 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Sun, 4 Apr 2021 13:38:57 +0200 Subject: [PATCH 11/13] Fix DeprecationWarning pt.2 --- pandas/tests/indexing/test_loc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 7da6bb66d2d95..db7cb2fdbe0b8 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2126,7 +2126,9 @@ def test_loc_bool_incompatible_index_raises(self, index, frame_or_series): obj.loc[True] def test_loc_bool_should_not_raise(self, frame_or_series): - obj = frame_or_series(index=Index([True, False], dtype="boolean")) + obj = frame_or_series( + index=Index([True, False], dtype="boolean"), dtype="object" + ) obj.loc[True] def test_loc_bool_slice_raises(self, index, frame_or_series): From 13bc416240c96c4c1f1ac250194a484c55d371a2 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Tue, 6 Apr 2021 09:22:16 +0200 Subject: [PATCH 12/13] Adjustments to comments. --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/indexing.py | 6 ++++-- pandas/tests/indexing/test_loc.py | 18 ++++++++++++------ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 48c4e471724e7..f9b34d7ade1bf 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -642,7 +642,7 @@ Indexing - Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`) - Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`) - Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`) -- Bug in :meth:`DataFrame.loc` incorrectly allowing the lookup of boolean labels and slices (:issue:`20432`) +- Bug in :meth:`DataFrame.loc` incorrectly matching non-boolean index elements (:issue:`20432`) Missing ^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f7441ac6db726..143f7aadc1594 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -966,12 +966,14 @@ def _validate_key(self, key, axis: int): # slice of integers (only if in the labels) # boolean not in slice and with boolean index if isinstance(key, bool) and not is_bool_dtype(self.obj.index): - raise KeyError("Boolean label can not be used without a boolean index") + raise KeyError( + f"{key}: boolean label can not be used without a boolean index" + ) if isinstance(key, slice) and ( isinstance(key.start, bool) or isinstance(key.stop, bool) ): - raise TypeError("Boolean values can not be used in a slice") + raise TypeError(f"{key}: boolean values can not be used in a slice") def _has_valid_setitem_indexer(self, indexer) -> bool: return True diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index db7cb2fdbe0b8..eac46fb64b65e 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2117,23 +2117,29 @@ def test_loc_getitem_slice_columns_mixed_dtype(self): class TestLocBooleanLabelsAndSlices(Base): - def test_loc_bool_incompatible_index_raises(self, index, frame_or_series): + @pytest.mark.parametrize("bool_value", [True, False]) + def test_loc_bool_incompatible_index_raises( + self, index, frame_or_series, bool_value + ): # GH20432 - message = "Boolean label can not be used without a boolean index" + message = f"{bool_value}: boolean label can not be used without a boolean index" if index.inferred_type != "boolean": obj = frame_or_series(index=index, dtype="object") with pytest.raises(KeyError, match=message): - obj.loc[True] + obj.loc[bool_value] - def test_loc_bool_should_not_raise(self, frame_or_series): + @pytest.mark.parametrize("bool_value", [True, False]) + def test_loc_bool_should_not_raise(self, frame_or_series, bool_value): obj = frame_or_series( index=Index([True, False], dtype="boolean"), dtype="object" ) - obj.loc[True] + obj.loc[bool_value] def test_loc_bool_slice_raises(self, index, frame_or_series): # GH20432 - message = "Boolean values can not be used in a slice" + message = ( + r"slice\(True, False, None\): boolean values can not be used in a slice" + ) obj = frame_or_series(index=index, dtype="object") with pytest.raises(TypeError, match=message): obj.loc[True:False] From 5330d1e7f9c21c0737f1ad986128d2a40bfc0cc4 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Tue, 6 Apr 2021 10:09:10 +0200 Subject: [PATCH 13/13] Adjustments to comments pt. 2. --- pandas/tests/frame/indexing/test_indexing.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index b64e2d94113b5..e2121fa2318eb 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -944,20 +944,17 @@ def test_getitem_ix_boolean_duplicates_multiple(self): exp = df[df[0] > 0] tm.assert_frame_equal(result, exp) - def test_getitem_setitem_ix_bool_keyerror(self): + @pytest.mark.parametrize("bool_value", [True, False]) + def test_getitem_setitem_ix_bool_keyerror(self, bool_value): # #2199 df = DataFrame({"a": [1, 2, 3]}) - message = "Boolean label can not be used without a boolean index" + message = f"{bool_value}: boolean label can not be used without a boolean index" with pytest.raises(KeyError, match=message): - df.loc[False] - with pytest.raises(KeyError, match=message): - df.loc[True] + df.loc[bool_value] msg = "cannot use a single bool to index into setitem" with pytest.raises(KeyError, match=msg): - df.loc[False] = 0 - with pytest.raises(KeyError, match=msg): - df.loc[True] = 0 + df.loc[bool_value] = 0 # TODO: rename? remove? def test_single_element_ix_dont_upcast(self, float_frame):