ENH: Optimize putmask implementation for CoW (#51268)

phofl · phofl · commit 5793ed249abd · 2023-02-10T19:37:46.000+01:00
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -9612,7 +9612,8 @@ def _where(
         # align the cond to same shape as myself
         cond = common.apply_if_callable(cond, self)
         if isinstance(cond, NDFrame):
-            cond, _ = cond.align(self, join="right", broadcast_axis=1, copy=False)
+            # CoW: Make sure reference is not kept alive
+            cond = cond.align(self, join="right", broadcast_axis=1, copy=False)[0]
         else:
             if not hasattr(cond, "shape"):
                 cond = np.asanyarray(cond)
@@ -9648,14 +9649,15 @@ def _where(
             # align with me
             if other.ndim <= self.ndim:
 
-                _, other = self.align(
+                # CoW: Make sure reference is not kept alive
+                other = self.align(
                     other,
                     join="left",
                     axis=axis,
                     level=level,
                     fill_value=None,
                     copy=False,
-                )
+                )[1]
 
                 # if we are NOT aligned, raise as we cannot where index
                 if axis is None and not other._indexed_same(self):
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -946,7 +946,7 @@ def _unstack(
 
     # ---------------------------------------------------------------------
 
-    def setitem(self, indexer, value) -> Block:
+    def setitem(self, indexer, value, using_cow: bool = False) -> Block:
         """
         Attempt self.values[indexer] = value, possibly creating a new array.
 
@@ -956,6 +956,8 @@ def setitem(self, indexer, value) -> Block:
             The subset of self.values to set
         value : object
             The value being set
+        using_cow: bool, default False
+            Signaling if CoW is used.
 
         Returns
         -------
@@ -991,10 +993,17 @@ def setitem(self, indexer, value) -> Block:
                     # checking lib.is_scalar here fails on
                     #  test_iloc_setitem_custom_object
                     casted = setitem_datetimelike_compat(values, len(vi), casted)
+
+            if using_cow and self.refs.has_reference():
+                values = values.copy()
+                self = self.make_block_same_class(
+                    values.T if values.ndim == 2 else values
+                )
+
             values[indexer] = casted
         return self
 
-    def putmask(self, mask, new) -> list[Block]:
+    def putmask(self, mask, new, using_cow: bool = False) -> list[Block]:
         """
         putmask the data to the block; it is possible that we may create a
         new dtype of block
@@ -1022,11 +1031,21 @@ def putmask(self, mask, new) -> list[Block]:
         new = extract_array(new, extract_numpy=True)
 
         if noop:
+            if using_cow:
+                return [self.copy(deep=False)]
             return [self]
 
         try:
             casted = np_can_hold_element(values.dtype, new)
+
+            if using_cow and self.refs.has_reference():
+                # Do this here to avoid copying twice
+                values = values.copy()
+                self = self.make_block_same_class(values)
+
             putmask_without_repeat(values.T, mask, casted)
+            if using_cow:
+                return [self.copy(deep=False)]
             return [self]
         except LossySetitemError:
 
@@ -1038,7 +1057,7 @@ def putmask(self, mask, new) -> list[Block]:
                     return self.coerce_to_target_dtype(new).putmask(mask, new)
                 else:
                     indexer = mask.nonzero()[0]
-                    nb = self.setitem(indexer, new[indexer])
+                    nb = self.setitem(indexer, new[indexer], using_cow=using_cow)
                     return [nb]
 
             else:
@@ -1053,7 +1072,7 @@ def putmask(self, mask, new) -> list[Block]:
                         n = new[:, i : i + 1]
 
                     submask = orig_mask[:, i : i + 1]
-                    rbs = nb.putmask(submask, n)
+                    rbs = nb.putmask(submask, n, using_cow=using_cow)
                     res_blocks.extend(rbs)
                 return res_blocks
 
@@ -1462,7 +1481,7 @@ class EABackedBlock(Block):
 
     values: ExtensionArray
 
-    def setitem(self, indexer, value):
+    def setitem(self, indexer, value, using_cow: bool = False):
         """
         Attempt self.values[indexer] = value, possibly creating a new array.
 
@@ -1475,6 +1494,8 @@ def setitem(self, indexer, value):
             The subset of self.values to set
         value : object
             The value being set
+        using_cow: bool, default False
+            Signaling if CoW is used.
 
         Returns
         -------
@@ -1581,7 +1602,7 @@ def where(self, other, cond, _downcast: str | bool = "infer") -> list[Block]:
         nb = self.make_block_same_class(res_values)
         return [nb]
 
-    def putmask(self, mask, new) -> list[Block]:
+    def putmask(self, mask, new, using_cow: bool = False) -> list[Block]:
         """
         See Block.putmask.__doc__
         """
@@ -1599,8 +1620,16 @@ def putmask(self, mask, new) -> list[Block]:
         mask = self._maybe_squeeze_arg(mask)
 
         if not mask.any():
+            if using_cow:
+                return [self.copy(deep=False)]
             return [self]
 
+        if using_cow and self.refs.has_reference():
+            values = values.copy()
+            self = self.make_block_same_class(  # type: ignore[assignment]
+                values.T if values.ndim == 2 else values
+            )
+
         try:
             # Caller is responsible for ensuring matching lengths
             values._putmask(mask, new)
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -362,14 +362,6 @@ def setitem(self: T, indexer, value) -> T:
         return self.apply("setitem", indexer=indexer, value=value)
 
     def putmask(self, mask, new, align: bool = True):
-        if using_copy_on_write() and any(
-            not self._has_no_reference_block(i) for i in range(len(self.blocks))
-        ):
-            # some reference -> copy full dataframe
-            # TODO(CoW) this could be optimized to only copy the blocks that would
-            # get modified
-            self = self.copy()
-
         if align:
             align_keys = ["new", "mask"]
         else:
@@ -381,6 +373,7 @@ def putmask(self, mask, new, align: bool = True):
             align_keys=align_keys,
             mask=mask,
             new=new,
+            using_cow=using_copy_on_write(),
         )
 
     def diff(self: T, n: int, axis: AxisInt) -> T:
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
@@ -1240,8 +1240,9 @@ def test_replace(using_copy_on_write, replace_kwargs):
     tm.assert_frame_equal(df, df_orig)
 
 
-def test_putmask(using_copy_on_write):
-    df = DataFrame({"a": [1, 2], "b": 1, "c": 2})
+@pytest.mark.parametrize("dtype", ["int64", "Int64"])
+def test_putmask(using_copy_on_write, dtype):
+    df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype)
     view = df[:]
     df_orig = df.copy()
     df[df == df] = 5
@@ -1255,6 +1256,51 @@ def test_putmask(using_copy_on_write):
         assert view.iloc[0, 0] == 5
 
 
+@pytest.mark.parametrize("dtype", ["int64", "Int64"])
+def test_putmask_no_reference(using_copy_on_write, dtype):
+    df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype)
+    arr_a = get_array(df, "a")
+    df[df == df] = 5
+
+    if using_copy_on_write:
+        assert np.shares_memory(arr_a, get_array(df, "a"))
+
+
+@pytest.mark.parametrize("dtype", ["float64", "Float64"])
+def test_putmask_aligns_rhs_no_reference(using_copy_on_write, dtype):
+    df = DataFrame({"a": [1.5, 2], "b": 1.5}, dtype=dtype)
+    arr_a = get_array(df, "a")
+    df[df == df] = DataFrame({"a": [5.5, 5]})
+
+    if using_copy_on_write:
+        assert np.shares_memory(arr_a, get_array(df, "a"))
+
+
+@pytest.mark.parametrize("val, exp", [(5.5, True), (5, False)])
+def test_putmask_dont_copy_some_blocks(using_copy_on_write, val, exp):
+    df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5})
+    view = df[:]
+    df_orig = df.copy()
+    indexer = DataFrame(
+        [[True, False, False], [True, False, False]], columns=list("abc")
+    )
+    df[indexer] = val
+
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
+        # TODO(CoW): Could split blocks to avoid copying the whole block
+        assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp
+        assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
+        assert df._mgr._has_no_reference(1) is not exp
+        assert not df._mgr._has_no_reference(2)
+        tm.assert_frame_equal(view, df_orig)
+    elif val == 5:
+        # Without CoW the original will be modified, the other case upcasts, e.g. copy
+        assert np.shares_memory(get_array(view, "a"), get_array(df, "a"))
+        assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
+        assert view.iloc[0, 0] == 5
+
+
 def test_asfreq_noop(using_copy_on_write):
     df = DataFrame(
         {"a": [0.0, None, 2.0, 3.0]},