From 671a13c720c43184ae81f1ca9a0c0c14c0ef04fd Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 3 Jul 2022 16:18:45 +0200 Subject: [PATCH 1/5] Move whatsnew Move whatsnew --- doc/source/whatsnew/v1.4.4.rst | 1 + pandas/core/frame.py | 7 ++++-- pandas/tests/frame/indexing/test_indexing.py | 9 ++++++++ pandas/tests/frame/indexing/test_setitem.py | 23 ++++++++++++++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index dce8fb60ecdd6..b4224b5b210e0 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -16,6 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`) - Fixed regression in :meth:`DataFrame.loc` not updating the cache correctly after values were set (:issue:`47867`) +- Fixed regression in :meth:`DataFrame.loc` not aligning index in some cases when setting a :class:`DataFrame` (:issue:`47578`) - Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5b25f5be01d29..f714dec0ce483 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4527,9 +4527,12 @@ def _sanitize_column(self, value) -> ArrayLike: """ self._ensure_valid_index(value) - # We should never get here with DataFrame value - if isinstance(value, Series): + # We can get there through isetitem with a DataFrame + # or through loc single_block_path + if isinstance(value, DataFrame): return _reindex_for_setitem(value, self.index) + elif is_dict_like(value): + return _reindex_for_setitem(Series(value), self.index) if is_list_like(value): com.require_length_match(value, self.index) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index a7af569e397eb..4dc9cbdb7b34d 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1323,6 +1323,15 @@ def test_loc_internals_not_updated_correctly(self): ) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("val", ["x", 1]) + @pytest.mark.parametrize("idxr", ["a", ["a"]]) + def test_loc_setitem_rhs_frame(self, idxr, val): + # GH#47578 + df = DataFrame({"a": [1, 2]}) + df.loc[:, "a"] = DataFrame({"a": [val, 11]}, index=[1, 2]) + expected = DataFrame({"a": [np.nan, val]}) + tm.assert_frame_equal(df, expected) + class TestDataFrameIndexingUInt64: def test_setitem(self, uint64_frame): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 673d347917832..e8b0feecce2d2 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -699,6 +699,29 @@ def test_setitem_npmatrix_2d(self): tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("vals", [{}, {"d": "a"}]) + def test_setitem_aligning_dict_with_index(self, vals): + # GH#47216 + df = DataFrame({"a": [1, 2], "b": [3, 4], **vals}) + df.loc[:, "a"] = {1: 100, 0: 200} + df.loc[:, "c"] = {0: 5, 1: 6} + df.loc[:, "e"] = {1: 5} + expected = DataFrame( + {"a": [200, 100], "b": [3, 4], **vals, "c": [5, 6], "e": [np.nan, 5]} + ) + tm.assert_frame_equal(df, expected) + + def test_setitem_rhs_dataframe(self): + # GH#47578 + df = DataFrame({"a": [1, 2]}) + df["a"] = DataFrame({"a": [10, 11]}, index=[1, 2]) + expected = DataFrame({"a": [np.nan, 10]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame({"a": [1, 2]}) + df.isetitem(0, DataFrame({"a": [10, 11]}, index=[1, 2])) + tm.assert_frame_equal(df, expected) + class TestSetitemTZAwareValues: @pytest.fixture From d32a495980eb01518f991939e06e48198b365967 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 5 Aug 2022 21:14:11 +0200 Subject: [PATCH 2/5] Remove test --- pandas/tests/frame/indexing/test_setitem.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index e8b0feecce2d2..fb3ef4ad3128c 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -699,18 +699,6 @@ def test_setitem_npmatrix_2d(self): tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("vals", [{}, {"d": "a"}]) - def test_setitem_aligning_dict_with_index(self, vals): - # GH#47216 - df = DataFrame({"a": [1, 2], "b": [3, 4], **vals}) - df.loc[:, "a"] = {1: 100, 0: 200} - df.loc[:, "c"] = {0: 5, 1: 6} - df.loc[:, "e"] = {1: 5} - expected = DataFrame( - {"a": [200, 100], "b": [3, 4], **vals, "c": [5, 6], "e": [np.nan, 5]} - ) - tm.assert_frame_equal(df, expected) - def test_setitem_rhs_dataframe(self): # GH#47578 df = DataFrame({"a": [1, 2]}) From 2293852878ced4388ad8abe6310dc299e04d0bec Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 5 Aug 2022 22:23:05 +0200 Subject: [PATCH 3/5] Remove other change --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f714dec0ce483..f84cf52a09fc4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4531,8 +4531,8 @@ def _sanitize_column(self, value) -> ArrayLike: # or through loc single_block_path if isinstance(value, DataFrame): return _reindex_for_setitem(value, self.index) - elif is_dict_like(value): - return _reindex_for_setitem(Series(value), self.index) + elif isinstance(value, Series): + return _reindex_for_setitem(value, self.index) if is_list_like(value): com.require_length_match(value, self.index) From 011d9c91f6c161afebab02058e31f4a67caaffd4 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 5 Aug 2022 22:30:02 +0200 Subject: [PATCH 4/5] Remove new part of test --- pandas/tests/frame/indexing/test_setitem.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index fb3ef4ad3128c..c438dc78ce397 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -706,10 +706,6 @@ def test_setitem_rhs_dataframe(self): expected = DataFrame({"a": [np.nan, 10]}) tm.assert_frame_equal(df, expected) - df = DataFrame({"a": [1, 2]}) - df.isetitem(0, DataFrame({"a": [10, 11]}, index=[1, 2])) - tm.assert_frame_equal(df, expected) - class TestSetitemTZAwareValues: @pytest.fixture From b1531b7e53517571f6f167aef837395456f3b196 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 8 Aug 2022 12:50:24 +0200 Subject: [PATCH 5/5] Adress review --- pandas/core/frame.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f84cf52a09fc4..fcd822988de20 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4527,11 +4527,8 @@ def _sanitize_column(self, value) -> ArrayLike: """ self._ensure_valid_index(value) - # We can get there through isetitem with a DataFrame - # or through loc single_block_path - if isinstance(value, DataFrame): - return _reindex_for_setitem(value, self.index) - elif isinstance(value, Series): + # We can get there through loc single_block_path + if isinstance(value, (DataFrame, Series)): return _reindex_for_setitem(value, self.index) if is_list_like(value):