From c60dc0f21a1c00704d7292017e67c6b82fa27d6a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 15 Feb 2023 12:40:57 +0100 Subject: [PATCH 1/5] BUG: Arithmetic inplace ops not respecting CoW --- doc/source/whatsnew/v2.0.0.rst | 3 +++ pandas/core/generic.py | 1 + pandas/tests/copy_view/test_methods.py | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index a37503460901b..725e228a6927e 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -259,6 +259,9 @@ Copy-on-Write improvements - :meth:`DataFrame.replace` will now respect the Copy-on-Write mechanism when ``inplace=True``. +- Arithmetic operations that can be inplace, e.g. ``ser *= 2`` will now respect the + Copy-on-Write mechanism. + Copy-on-Write can be enabled through one of .. code-block:: python diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 78b0a8a7a6ded..ff11872c8dc63 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11735,6 +11735,7 @@ def _inplace_method(self, other, op): self.ndim == 1 and result._indexed_same(self) and is_dtype_equal(result.dtype, self.dtype) + and not (using_copy_on_write() and not self._mgr._has_no_reference(0)) ): # GH#36498 this inplace op can _actually_ be inplace. self._values[:] = result._values diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index eca827bd60e0d..91419ba415fda 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1479,3 +1479,23 @@ def test_xs_multiindex(using_copy_on_write, using_array_manager, key, level, axi result.iloc[0, 0] = 0 tm.assert_frame_equal(df, df_orig) + + +def test_inplace_arithmetic_series(): + ser = Series([1, 2, 3]) + data = get_array(ser) + ser *= 2 + assert np.shares_memory(get_array(ser), data) + tm.assert_numpy_array_equal(data, get_array(ser)) + + +def test_inplace_arithmetic_series_with_reference(using_copy_on_write): + ser = Series([1, 2, 3]) + ser_orig = ser.copy() + view = ser[:] + ser *= 2 + if using_copy_on_write: + assert not np.shares_memory(get_array(ser), get_array(view)) + tm.assert_series_equal(ser_orig, view) + else: + assert np.shares_memory(get_array(ser), get_array(view)) From 93c1c9f6e943c4960aebc69e623ec3015833d5e4 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 15 Feb 2023 14:29:08 +0100 Subject: [PATCH 2/5] Fix tests --- pandas/tests/frame/test_arithmetic.py | 13 +++++++++---- pandas/tests/indexing/test_loc.py | 7 +++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index d2d2b66df36d7..bcc1ae0183b97 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1988,17 +1988,22 @@ def test_arith_list_of_arraylike_raise(to_add): to_add + df -def test_inplace_arithmetic_series_update(): +def test_inplace_arithmetic_series_update(using_copy_on_write): # https://github.com/pandas-dev/pandas/issues/36373 df = DataFrame({"A": [1, 2, 3]}) + df_orig = df.copy() series = df["A"] vals = series._values series += 1 - assert series._values is vals + if using_copy_on_write: + assert series._values is not vals + tm.assert_frame_equal(df, df_orig) + else: + assert series._values is vals - expected = DataFrame({"A": [2, 3, 4]}) - tm.assert_frame_equal(df, expected) + expected = DataFrame({"A": [2, 3, 4]}) + tm.assert_frame_equal(df, expected) def test_arithemetic_multiindex_align(): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 2243879050c0c..44c0b654db268 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1105,9 +1105,12 @@ def test_identity_slice_returns_new_object(self, using_copy_on_write): else: assert all(sliced_series[:3] == [7, 8, 9]) - @pytest.mark.xfail(reason="accidental fix reverted - GH37497") - def test_loc_copy_vs_view(self): + def test_loc_copy_vs_view(self, request, using_copy_on_write): # GH 15631 + + if not using_copy_on_write: + mark = pytest.mark.xfail(reason="accidental fix reverted - GH37497") + request.node.add_marker(mark) x = DataFrame(zip(range(3), range(3)), columns=["a", "b"]) y = x.copy() From c8dc322e25b26a196da725a4058f9676156c993d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 15 Feb 2023 14:30:53 +0100 Subject: [PATCH 3/5] Fix mypy --- pandas/core/generic.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ff11872c8dc63..7dcd15cb3aca2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11731,11 +11731,16 @@ def _inplace_method(self, other, op): """ result = op(self, other) + # Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager, + # BlockManager, SingleBlockManager]" has no attribute "_has_no_reference" if ( self.ndim == 1 and result._indexed_same(self) and is_dtype_equal(result.dtype, self.dtype) - and not (using_copy_on_write() and not self._mgr._has_no_reference(0)) + and not ( + using_copy_on_write() + and not self._mgr._has_no_reference(0) # type: ignore[union-attr] + ) ): # GH#36498 this inplace op can _actually_ be inplace. self._values[:] = result._values From 0f8ea5174ea4deaa37b19c91a1e703ef1482db61 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 15 Feb 2023 14:50:17 +0100 Subject: [PATCH 4/5] Address review --- pandas/core/generic.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7dcd15cb3aca2..344056f12b0e8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11731,19 +11731,13 @@ def _inplace_method(self, other, op): """ result = op(self, other) - # Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager, - # BlockManager, SingleBlockManager]" has no attribute "_has_no_reference" if ( self.ndim == 1 and result._indexed_same(self) and is_dtype_equal(result.dtype, self.dtype) - and not ( - using_copy_on_write() - and not self._mgr._has_no_reference(0) # type: ignore[union-attr] - ) ): # GH#36498 this inplace op can _actually_ be inplace. - self._values[:] = result._values + self._mgr.setitem_inplace(slice(None), result._values) return self # Delete cacher From 2d4bee74a6534bc14e7e136a8b5ff2e350c43676 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 15 Feb 2023 16:04:33 +0100 Subject: [PATCH 5/5] Fix typing --- pandas/core/generic.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 344056f12b0e8..96abd98bbdd75 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11737,7 +11737,11 @@ def _inplace_method(self, other, op): and is_dtype_equal(result.dtype, self.dtype) ): # GH#36498 this inplace op can _actually_ be inplace. - self._mgr.setitem_inplace(slice(None), result._values) + # Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager, + # BlockManager, SingleBlockManager]" has no attribute "setitem_inplace" + self._mgr.setitem_inplace( # type: ignore[union-attr] + slice(None), result._values + ) return self # Delete cacher