From 88b5591581e534378587b1485a25db57aa3ee086 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 15 Jun 2022 09:58:30 +0200 Subject: [PATCH 1/4] BUG: DataFrame.loc not aligning dict when setting to a column --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/frame.py | 2 ++ pandas/tests/frame/indexing/test_setitem.py | 9 +++++++++ 3 files changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 5891eeea98cbb..9077b2b8b5e62 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -821,6 +821,7 @@ Indexing - Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`) - Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`) - Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`) +- Bug in :meth:`DataFrame.loc` when setting values to a column and right hand side is a dictionary (:issue:`47216`) - Bug in :meth:`Series.__setitem__` with ``datetime64[ns]`` dtype, an all-``False`` boolean mask, and an incompatible value incorrectly casting to ``object`` instead of retaining ``datetime64[ns]`` dtype (:issue:`45967`) - Bug in :meth:`Index.__getitem__` raising ``ValueError`` when indexer is from boolean dtype with ``NA`` (:issue:`45806`) - Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 39a940169e1f3..51ae05cb1cf03 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4699,6 +4699,8 @@ def _sanitize_column(self, value) -> ArrayLike: # We should never get here with DataFrame value if isinstance(value, Series): return _reindex_for_setitem(value, self.index) + elif isinstance(value, dict): + return _reindex_for_setitem(Series(value), self.index) if is_list_like(value): com.require_length_match(value, self.index) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index cf6d351aa78a0..72635d6e0820c 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -702,6 +702,15 @@ def test_setitem_npmatrix_2d(self): tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("vals", [{}, {"d": "a"}]) + def test_setitem_aligning_dict_with_index(self, vals): + # GH#47216 + df = DataFrame({"a": [1, 2], "b": [3, 4], **vals}) + df.loc[:, "a"] = {1: 100, 0: 200} + df.loc[:, "c"] = {0: 5, 1: 6} + expected = DataFrame({"a": [200, 100], "b": [3, 4], **vals, "c": [5, 6]}) + tm.assert_frame_equal(df, expected) + class TestSetitemTZAwareValues: @pytest.fixture From 9dd728d0523ed8dce596bcca52d2135d5ed8808b Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 16 Jun 2022 23:06:02 +0200 Subject: [PATCH 2/4] Add partial case --- pandas/tests/frame/indexing/test_setitem.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 72635d6e0820c..e95b09f6d7048 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -708,7 +708,10 @@ def test_setitem_aligning_dict_with_index(self, vals): df = DataFrame({"a": [1, 2], "b": [3, 4], **vals}) df.loc[:, "a"] = {1: 100, 0: 200} df.loc[:, "c"] = {0: 5, 1: 6} - expected = DataFrame({"a": [200, 100], "b": [3, 4], **vals, "c": [5, 6]}) + df.loc[:, "e"] = {1: 5} + expected = DataFrame( + {"a": [200, 100], "b": [3, 4], **vals, "c": [5, 6], "e": [np.nan, 5]} + ) tm.assert_frame_equal(df, expected) From d2708512b751c6470690fdff0abc9c99404dc002 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 28 Jun 2022 16:15:50 +0200 Subject: [PATCH 3/4] Use is_dict_like --- pandas/core/frame.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5942dd3c32e6d..f4d02b08aab0d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4707,9 +4707,7 @@ def _sanitize_column(self, value) -> ArrayLike: self._ensure_valid_index(value) # We should never get here with DataFrame value - if isinstance(value, Series): - return _reindex_for_setitem(value, self.index) - elif isinstance(value, dict): + if is_dict_like(value): return _reindex_for_setitem(Series(value), self.index) if is_list_like(value): From 6992ec429d44dffd3b3479a75b634347d96c36e9 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 28 Jun 2022 20:28:40 +0200 Subject: [PATCH 4/4] Revert "Use is_dict_like" This reverts commit d2708512b751c6470690fdff0abc9c99404dc002. --- pandas/core/frame.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f4d02b08aab0d..5942dd3c32e6d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4707,7 +4707,9 @@ def _sanitize_column(self, value) -> ArrayLike: self._ensure_valid_index(value) # We should never get here with DataFrame value - if is_dict_like(value): + if isinstance(value, Series): + return _reindex_for_setitem(value, self.index) + elif isinstance(value, dict): return _reindex_for_setitem(Series(value), self.index) if is_list_like(value):