From 16c712e6110893d68c9bde60ade70395547c3170 Mon Sep 17 00:00:00 2001 From: patrick Date: Thu, 7 Jul 2022 13:07:20 +0200 Subject: [PATCH 1/4] BUG: boolean indexer with NA raising when reindex is necessary --- pandas/core/indexing.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 665333d0d7b4f..3f88128d359cb 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -30,6 +30,7 @@ from pandas.core.dtypes.common import ( is_array_like, is_bool_dtype, + is_extension_array_dtype, is_hashable, is_integer, is_iterator, @@ -2534,12 +2535,22 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: result = result.reindex(index) mask = isna(result._values) if mask.any(): - raise IndexingError( + + msg = ( "Unalignable boolean Series provided as " "indexer (index of the boolean Series and of " "the indexed object do not match)." ) - return result.astype(bool)._values + + if is_extension_array_dtype(key): + _, indexer = key.index.reindex(index) + if any(x == -1 for x in indexer): + raise IndexingError(msg) + + else: + raise IndexingError(msg) + else: + return result.astype(bool)._values if is_object_dtype(key): # key might be object-dtype bool, check_array_indexer needs bool array result = np.asarray(result, dtype=bool) From 7d946d6b5b887308bc1a2a1aa74007f5b72316eb Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 7 Jul 2022 13:36:01 +0200 Subject: [PATCH 2/4] Add tests --- doc/source/whatsnew/v1.5.0.rst | 2 ++ pandas/core/indexing.py | 17 +++++------------ pandas/tests/series/indexing/test_indexing.py | 19 +++++++++++++++++++ 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 0b450fab53137..8d8d892a3d7e1 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1,3 +1,4 @@ + .. _whatsnew_150: What's new in 1.5.0 (??) @@ -885,6 +886,7 @@ Indexing - Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`) - Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`) - Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`) +- Bug in :meth:`Series.loc` raising with boolean indexer containing ``NA`` when :class:`Index` did not match (:issue:`46551`) - Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtype :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`) - Bug in :meth:`DataFrame.loc` when setting values to a column and right hand side is a dictionary (:issue:`47216`) - Bug in :meth:`DataFrame.loc` when setting a :class:`DataFrame` not aligning index in some cases (:issue:`47578`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3f88128d359cb..b24e99c9b04bc 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2532,25 +2532,18 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: """ result = key if isinstance(key, ABCSeries) and not key.index.equals(index): + if is_extension_array_dtype(result): + result[result.values._mask] = False + result = result.reindex(index) mask = isna(result._values) if mask.any(): - - msg = ( + raise IndexingError( "Unalignable boolean Series provided as " "indexer (index of the boolean Series and of " "the indexed object do not match)." ) - - if is_extension_array_dtype(key): - _, indexer = key.index.reindex(index) - if any(x == -1 for x in indexer): - raise IndexingError(msg) - - else: - raise IndexingError(msg) - else: - return result.astype(bool)._values + return result.astype(bool)._values if is_object_dtype(key): # key might be object-dtype bool, check_array_indexer needs bool array result = np.asarray(result, dtype=bool) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 3a8e14576a55d..2f4fffe57593f 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -5,7 +5,10 @@ import numpy as np import pytest +from pandas.errors import IndexingError + from pandas import ( + NA, DataFrame, IndexSlice, MultiIndex, @@ -330,6 +333,22 @@ def test_loc_setitem_all_false_indexer(): tm.assert_series_equal(ser, expected) +def test_loc_boolean_indexer_non_matching_index(): + # GH#46551 + ser = Series([1]) + result = ser.loc[Series([NA, False], dtype="boolean")] + expected = Series([], dtype="int64") + tm.assert_series_equal(result, expected) + + +def test_loc_boolean_indexer_miss_matching_index(): + # GH#46551 + ser = Series([1]) + indexer = Series([NA, False], dtype="boolean", index=[1, 2]) + with pytest.raises(IndexingError, match="Unalignable"): + ser.loc[indexer] + + class TestDeprecatedIndexers: @pytest.mark.parametrize("key", [{1}, {1: 1}]) def test_getitem_dict_and_set_deprecated(self, key): From 184681a72a884880c7dfb371f31d34df4d733769 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 8 Jul 2022 22:09:35 +0200 Subject: [PATCH 3/4] Remove space --- doc/source/whatsnew/v1.5.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 8d8d892a3d7e1..62a99ce3cae44 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1,4 +1,3 @@ - .. _whatsnew_150: What's new in 1.5.0 (??) From 53d437bac97b9631b9804e6053f756669a74efda Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 8 Jul 2022 22:28:03 +0200 Subject: [PATCH 4/4] Use indexer --- pandas/core/indexing.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b24e99c9b04bc..30d6a8a9f019b 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2532,18 +2532,20 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: """ result = key if isinstance(key, ABCSeries) and not key.index.equals(index): - if is_extension_array_dtype(result): - result[result.values._mask] = False - - result = result.reindex(index) - mask = isna(result._values) - if mask.any(): + indexer = result.index.get_indexer_for(index) + if -1 in indexer: raise IndexingError( "Unalignable boolean Series provided as " "indexer (index of the boolean Series and of " "the indexed object do not match)." ) - return result.astype(bool)._values + + result = result.take(indexer) + + # fall through for boolean + if not is_extension_array_dtype(result.dtype): + return result.astype(bool)._values + if is_object_dtype(key): # key might be object-dtype bool, check_array_indexer needs bool array result = np.asarray(result, dtype=bool)