From cbea56894526b60553b90ba9bd90736e300be740 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 20 Feb 2021 23:20:59 +0100 Subject: [PATCH 1/4] Fix regression for setitem not aligning rhs with boolean indexer --- doc/source/whatsnew/v1.2.3.rst | 2 +- pandas/core/frame.py | 2 ++ pandas/tests/frame/indexing/test_setitem.py | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.3.rst b/doc/source/whatsnew/v1.2.3.rst index 4231b6d94b1b9..610f3ed2ab744 100644 --- a/doc/source/whatsnew/v1.2.3.rst +++ b/doc/source/whatsnew/v1.2.3.rst @@ -16,7 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :func:`pandas.to_excel` raising ``KeyError`` when giving duplicate columns with ``columns`` attribute (:issue:`39695`) -- +- Fixed regression in :meth:`DataFrame.__setitem__` not aligning :class:`DataFrame` on right-hand side for boolean indexer (:issue:`39931`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3fe330f659513..ee4ceb995908d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3263,6 +3263,8 @@ def _setitem_array(self, key, value): key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] self._check_setitem_copy() + if isinstance(value, DataFrame): + value = value.reindex(self.index.take(indexer)) self.iloc[indexer] = value else: if isinstance(value, DataFrame): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 657faa0f9b505..16d30877835bd 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -566,3 +566,11 @@ def test_setitem_boolean_mask(self, mask_type, float_frame): expected = df.copy() expected.values[np.array(mask)] = np.nan tm.assert_frame_equal(result, expected) + + def test_setitem_boolean_mask_aligning(self): + # GH#39931 + df = DataFrame({"a": [1, 4, 2, 3], "b": [5, 6, 7, 8]}) + expected = df.copy() + mask = df["a"] >= 3 + df[mask] = df[mask].sort_values("a") + tm.assert_frame_equal(df, expected) From 94a02583efcc59c83b5549fc4b6ff25c70562e24 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 21 Feb 2021 18:48:43 +0100 Subject: [PATCH 2/4] Parametrize --- pandas/tests/frame/indexing/test_setitem.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 16d30877835bd..a592abe6a56a5 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -567,10 +567,11 @@ def test_setitem_boolean_mask(self, mask_type, float_frame): expected.values[np.array(mask)] = np.nan tm.assert_frame_equal(result, expected) - def test_setitem_boolean_mask_aligning(self): + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc]) + def test_setitem_boolean_mask_aligning(self, indexer): # GH#39931 df = DataFrame({"a": [1, 4, 2, 3], "b": [5, 6, 7, 8]}) expected = df.copy() mask = df["a"] >= 3 - df[mask] = df[mask].sort_values("a") + indexer(df)[mask] = indexer(df)[mask].sort_values("a") tm.assert_frame_equal(df, expected) From 3b05b3b8938c354c17cbd544b959c535444d130f Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 22 Feb 2021 20:52:56 +0100 Subject: [PATCH 3/4] Add comment to code --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ee4ceb995908d..e1ec0af4ab1bf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3264,6 +3264,7 @@ def _setitem_array(self, key, value): indexer = key.nonzero()[0] self._check_setitem_copy() if isinstance(value, DataFrame): + # GHä39931 reindex since iloc does not align value = value.reindex(self.index.take(indexer)) self.iloc[indexer] = value else: From 0071348a3cecdd7b1d4d3bdbd05b1732088eb121 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 22 Feb 2021 21:08:29 +0100 Subject: [PATCH 4/4] Fix typo --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1e66846c65c9a..131a96d10a6d0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3265,7 +3265,7 @@ def _setitem_array(self, key, value): indexer = key.nonzero()[0] self._check_setitem_copy() if isinstance(value, DataFrame): - # GHä39931 reindex since iloc does not align + # GH#39931 reindex since iloc does not align value = value.reindex(self.index.take(indexer)) self.iloc[indexer] = value else: