From 0f7f932be4521d9e335e0f45e17e696f044ff28a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Aug 2022 13:47:23 +0200 Subject: [PATCH 1/5] REGR: properly update DataFrame cache in Series.__setitem__ --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 579177dae827d..d2f66e9bd36e2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1169,7 +1169,7 @@ def __setitem__(self, key, value) -> None: self._set_with(key, value) if cacher_needs_updating: - self._maybe_update_cacher() + self._maybe_update_cacher(inplace=True) def _set_with_engine(self, key, value) -> None: loc = self.index.get_loc(key) From b567d025a555473dc5eba9c74ec2375f846685f4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 24 Aug 2022 00:17:16 +0200 Subject: [PATCH 2/5] whatsnew --- doc/source/whatsnew/v1.4.4.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index deff6e194c3bd..0a371b7a6efaf 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -26,6 +26,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`) - Fixed regression when slicing with :meth:`DataFrame.loc` with :class:`DateOffset`-index (:issue:`46671`) - Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`) +- Fixed regression in updating a DataFrame column through :meth:`Series.__setitem__` (:issue:`47172`) - Fixed regression in :meth:`DataFrame.select_dtypes` returning a view on the original DataFrame (:issue:`48090`) - Fixed regression using custom Index subclasses (for example, used in xarray) with :meth:`~DataFrame.reset_index` or :meth:`Index.insert` (:issue:`47071`) - Fixed regression in :meth:`DatetimeIndex.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`) From 253a7a5b94230b7d062868b226ab18508dc195ef Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 24 Aug 2022 00:27:21 +0200 Subject: [PATCH 3/5] add test --- pandas/tests/frame/indexing/test_setitem.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 6d2becd7a32d2..829f5a3ebc0cf 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1235,3 +1235,20 @@ def test_setitem_not_operating_inplace(self, value, set_value, indexer): view = df[:] df[indexer] = set_value tm.assert_frame_equal(view, expected) + + def test_setitem_column_update_inplace(self, using_copy_on_write): + # https://github.com/pandas-dev/pandas/issues/47172 + + labels = [f"c{i}" for i in range(10)] + df = DataFrame({col: np.zeros(len(labels)) for col in labels}, index=labels) + values = df._mgr.blocks[0].values + + for label in df.columns: + df[label][label] = 1 + + if not using_copy_on_write: + # diagonal values all updated + assert np.all(values[np.arange(10), np.arange(10)] == 1) + else: + # original dataframe not updated + assert np.all(values[np.arange(10), np.arange(10)] == 0) From f32ef475c42d271ce0eba528a8fb295fbf984a85 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 25 Aug 2022 12:39:01 +0200 Subject: [PATCH 4/5] skip for array managet (accesses blocks) --- pandas/tests/frame/indexing/test_setitem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 829f5a3ebc0cf..cf0ff4e3603f3 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1236,6 +1236,7 @@ def test_setitem_not_operating_inplace(self, value, set_value, indexer): df[indexer] = set_value tm.assert_frame_equal(view, expected) + @td.skip_array_manager_invalid_test def test_setitem_column_update_inplace(self, using_copy_on_write): # https://github.com/pandas-dev/pandas/issues/47172 From 3dc008a45160b620dec2a4d1cefb75595200d387 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 26 Aug 2022 00:05:26 +0200 Subject: [PATCH 5/5] update whatsnew --- doc/source/whatsnew/v1.4.4.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index 0a371b7a6efaf..e03e6cd41ebd3 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -26,7 +26,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`) - Fixed regression when slicing with :meth:`DataFrame.loc` with :class:`DateOffset`-index (:issue:`46671`) - Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`) -- Fixed regression in updating a DataFrame column through :meth:`Series.__setitem__` (:issue:`47172`) +- Fixed regression in updating a DataFrame column through Series ``__setitem__`` (using chained assignment) not updating column values inplace and using too much memory (:issue:`47172`) - Fixed regression in :meth:`DataFrame.select_dtypes` returning a view on the original DataFrame (:issue:`48090`) - Fixed regression using custom Index subclasses (for example, used in xarray) with :meth:`~DataFrame.reset_index` or :meth:`Index.insert` (:issue:`47071`) - Fixed regression in :meth:`DatetimeIndex.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`)