From 0f70266d44308ae691fc4a3e7b647f2e10561c54 Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Mon, 22 Feb 2021 23:59:23 +0100 Subject: [PATCH 1/2] Backport PR #39944: Fix regression for setitem not aligning rhs with boolean indexer --- doc/source/whatsnew/v1.2.3.rst | 2 +- pandas/core/frame.py | 3 +++ pandas/tests/frame/indexing/test_setitem.py | 9 +++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.3.rst b/doc/source/whatsnew/v1.2.3.rst index 5ed8fd84472c4..d305024909703 100644 --- a/doc/source/whatsnew/v1.2.3.rst +++ b/doc/source/whatsnew/v1.2.3.rst @@ -16,7 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`~DataFrame.to_excel` raising ``KeyError`` when giving duplicate columns with ``columns`` attribute (:issue:`39695`) -- +- Fixed regression in :meth:`DataFrame.__setitem__` not aligning :class:`DataFrame` on right-hand side for boolean indexer (:issue:`39931`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c5324165ef68e..d1855774e5692 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3179,6 +3179,9 @@ def _setitem_array(self, key, value): key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] self._check_setitem_copy() + if isinstance(value, DataFrame): + # GH#39931 reindex since iloc does not align + value = value.reindex(self.index.take(indexer)) self.iloc[indexer] = value else: if isinstance(value, DataFrame): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index cedef4784e4a1..90131ca321a15 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -404,3 +404,12 @@ def test_setitem_boolean_mask(self, mask_type, float_frame): expected = df.copy() expected.values[np.array(mask)] = np.nan tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc]) + def test_setitem_boolean_mask_aligning(self, indexer): + # GH#39931 + df = DataFrame({"a": [1, 4, 2, 3], "b": [5, 6, 7, 8]}) + expected = df.copy() + mask = df["a"] >= 3 + indexer(df)[mask] = indexer(df)[mask].sort_values("a") + tm.assert_frame_equal(df, expected) From 393467d27b63c0804582c02b96506559cf6580ef Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 23 Feb 2021 10:03:18 +0000 Subject: [PATCH 2/2] fix failing test --- pandas/tests/frame/indexing/test_setitem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 90131ca321a15..d40fc388fefbd 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -405,7 +405,7 @@ def test_setitem_boolean_mask(self, mask_type, float_frame): expected.values[np.array(mask)] = np.nan tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc]) + @pytest.mark.parametrize("indexer", [lambda x: x, lambda x: x.loc]) def test_setitem_boolean_mask_aligning(self, indexer): # GH#39931 df = DataFrame({"a": [1, 4, 2, 3], "b": [5, 6, 7, 8]})