diff --git a/doc/source/whatsnew/v1.5.3.rst b/doc/source/whatsnew/v1.5.3.rst index b2039abe9f715..9263ee42c98c0 100644 --- a/doc/source/whatsnew/v1.5.3.rst +++ b/doc/source/whatsnew/v1.5.3.rst @@ -26,6 +26,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Bug in the Copy-on-Write implementation losing track of views when indexing a :class:`DataFrame` with another :class:`DataFrame` (:issue:`50630`) - Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`) - Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`) - Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 82d39fab54106..1f95808437614 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -393,10 +393,8 @@ def setitem(self: T, indexer, value) -> T: return self.apply("setitem", indexer=indexer, value=value) def putmask(self, mask, new, align: bool = True): - if ( - _using_copy_on_write() - and self.refs is not None - and not all(ref is None for ref in self.refs) + if _using_copy_on_write() and any( + not self._has_no_reference_block(i) for i in range(len(self.blocks)) ): # some reference -> copy full dataframe # TODO(CoW) this could be optimized to only copy the blocks that would diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 956e2cf98c9b6..0b366f378fa13 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -214,3 +214,18 @@ def test_chained_methods(request, method, idx, using_copy_on_write): df.iloc[0, 0] = 0 if not df2_is_view: tm.assert_frame_equal(df2.iloc[:, idx:], df_orig) + + +def test_putmask(using_copy_on_write): + df = DataFrame({"a": [1, 2], "b": 1, "c": 2}) + view = df[:] + df_orig = df.copy() + df[df == df] = 5 + + if using_copy_on_write: + assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) + tm.assert_frame_equal(view, df_orig) + else: + # Without CoW the original will be modified + assert np.shares_memory(get_array(view, "a"), get_array(df, "a")) + assert view.iloc[0, 0] == 5