From b5d4f8452db50fc9a38056900554e73bfd0ba8f6 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 26 Feb 2023 01:35:03 +0100
Subject: [PATCH 01/14] CoW: PoC for avoiding copy when setting values for rhs

---
 pandas/core/frame.py              | 41 +++++++++++++++++++------------
 pandas/core/internals/managers.py |  4 +--
 2 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 49416cc2d53c0..96ee183e98282 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -47,6 +47,7 @@
     properties,
 )
 from pandas._libs.hashtable import duplicated
+from pandas._libs.internals import BlockValuesRefs
 from pandas._libs.lib import (
     NoDefault,
     is_range_indexer,
@@ -3896,11 +3897,11 @@ def isetitem(self, loc, value) -> None:
                 loc = [loc]
 
             for i, idx in enumerate(loc):
-                arraylike = self._sanitize_column(value.iloc[:, i])
+                arraylike, _ = self._sanitize_column(value.iloc[:, i])
                 self._iset_item_mgr(idx, arraylike, inplace=False)
             return
 
-        arraylike = self._sanitize_column(value)
+        arraylike, _ = self._sanitize_column(value)
         self._iset_item_mgr(loc, arraylike, inplace=False)
 
     def __setitem__(self, key, value):
@@ -4069,7 +4070,7 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
                 return
 
             # now align rows
-            arraylike = _reindex_for_setitem(value, self.index)
+            arraylike, _ = _reindex_for_setitem(value, self.index)
             self._set_item_mgr(key, arraylike)
             return
 
@@ -4088,14 +4089,14 @@ def _iset_item_mgr(
         self._mgr.iset(loc, value, inplace=inplace)
         self._clear_item_cache()
 
-    def _set_item_mgr(self, key, value: ArrayLike) -> None:
+    def _set_item_mgr(self, key, value: ArrayLike, refs=None) -> None:
         try:
             loc = self._info_axis.get_loc(key)
         except KeyError:
             # This item wasn't present, just insert at end
-            self._mgr.insert(len(self._info_axis), key, value)
+            self._mgr.insert(len(self._info_axis), key, value, refs)
         else:
-            self._iset_item_mgr(loc, value)
+            self._iset_item_mgr(loc, value, refs)
 
         # check if we are modifying a copy
         # try to set first as we want an invalid
@@ -4104,7 +4105,7 @@ def _set_item_mgr(self, key, value: ArrayLike) -> None:
             self._check_setitem_copy()
 
     def _iset_item(self, loc: int, value) -> None:
-        arraylike = self._sanitize_column(value)
+        arraylike, _ = self._sanitize_column(value)
         self._iset_item_mgr(loc, arraylike, inplace=True)
 
         # check if we are modifying a copy
@@ -4123,7 +4124,7 @@ def _set_item(self, key, value) -> None:
         Series/TimeSeries will be conformed to the DataFrames index to
         ensure homogeneity.
         """
-        value = self._sanitize_column(value)
+        value, refs = self._sanitize_column(value)
 
         if (
             key in self.columns
@@ -4136,7 +4137,7 @@ def _set_item(self, key, value) -> None:
                 if isinstance(existing_piece, DataFrame):
                     value = np.tile(value, (len(existing_piece.columns), 1)).T
 
-        self._set_item_mgr(key, value)
+        self._set_item_mgr(key, value, refs)
 
     def _set_value(
         self, index: IndexLabel, col, value: Scalar, takeable: bool = False
@@ -4755,7 +4756,7 @@ def insert(
         if not isinstance(loc, int):
             raise TypeError("loc must be int")
 
-        value = self._sanitize_column(value)
+        value, _ = self._sanitize_column(value)
         self._mgr.insert(loc, column, value)
 
     def assign(self, **kwargs) -> DataFrame:
@@ -4826,7 +4827,7 @@ def assign(self, **kwargs) -> DataFrame:
             data[k] = com.apply_if_callable(v, data)
         return data
 
-    def _sanitize_column(self, value) -> ArrayLike:
+    def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]:
         """
         Ensures new columns (which go into the BlockManager as new blocks) are
         always copied and converted into an array.
@@ -4846,11 +4847,15 @@ def _sanitize_column(self, value) -> ArrayLike:
         if isinstance(value, DataFrame):
             return _reindex_for_setitem(value, self.index)
         elif is_dict_like(value):
-            return _reindex_for_setitem(Series(value), self.index)
+            if not isinstance(value, Series):
+                value = Series(value)
+            return _reindex_for_setitem(
+                value, self.index, using_cow=using_copy_on_write()
+            )
 
         if is_list_like(value):
             com.require_length_match(value, self.index)
-        return sanitize_array(value, self.index, copy=True, allow_2d=True)
+        return sanitize_array(value, self.index, copy=True, allow_2d=True), None
 
     @property
     def _series(self):
@@ -11566,11 +11571,15 @@ def _from_nested_dict(data) -> collections.defaultdict:
     return new_data
 
 
-def _reindex_for_setitem(value: DataFrame | Series, index: Index) -> ArrayLike:
+def _reindex_for_setitem(
+    value: DataFrame | Series, index: Index, using_cow: bool = False
+) -> tuple[ArrayLike, BlockValuesRefs | None]:
     # reindex if necessary
 
     if value.index.equals(index) or not len(index):
-        return value._values.copy()
+        if using_cow and isinstance(value, Series):
+            return value._values, value._mgr.blocks[0].refs
+        return value._values.copy(), None
 
     # GH#4107
     try:
@@ -11584,4 +11593,4 @@ def _reindex_for_setitem(value: DataFrame | Series, index: Index) -> ArrayLike:
         raise TypeError(
             "incompatible index of inserted column with frame index"
         ) from err
-    return reindexed_value
+    return reindexed_value, None
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index cb32b3bbc6cc7..3959639204d97 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1358,7 +1358,7 @@ def column_setitem(
             new_mgr = col_mgr.setitem((idx,), value)
             self.iset(loc, new_mgr._block.values, inplace=True)
 
-    def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
+    def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None:
         """
         Insert item at selected position.
 
@@ -1382,7 +1382,7 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
 
         bp = BlockPlacement(slice(loc, loc + 1))
         # TODO(CoW) do we always "own" the passed `value`?
-        block = new_block_2d(values=value, placement=bp)
+        block = new_block_2d(values=value, placement=bp, refs=refs)
 
         if not len(self.blocks):
             # Fastpath

From e3a50ddebdcf4366f99d0cd08fe85aa2eb1cf2bd Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 28 Feb 2023 23:40:55 +0100
Subject: [PATCH 02/14] Update tests

---
 pandas/core/frame.py                        | 16 ++++++------
 pandas/core/internals/blocks.py             | 13 ++++++++--
 pandas/core/internals/managers.py           | 28 ++++++++++++++++-----
 pandas/tests/copy_view/test_indexing.py     |  7 ++++--
 pandas/tests/copy_view/test_setitem.py      |  8 ++----
 pandas/tests/frame/indexing/test_setitem.py |  2 ++
 6 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index eb18bccfbd99c..34b03d29a9518 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4085,10 +4085,10 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
         self[key] = value[value.columns[0]]
 
     def _iset_item_mgr(
-        self, loc: int | slice | np.ndarray, value, inplace: bool = False
+        self, loc: int | slice | np.ndarray, value, inplace: bool = False, refs=None
     ) -> None:
         # when called from _set_item_mgr loc can be anything returned from get_loc
-        self._mgr.iset(loc, value, inplace=inplace)
+        self._mgr.iset(loc, value, inplace=inplace, refs=refs)
         self._clear_item_cache()
 
     def _set_item_mgr(self, key, value: ArrayLike, refs=None) -> None:
@@ -4098,7 +4098,7 @@ def _set_item_mgr(self, key, value: ArrayLike, refs=None) -> None:
             # This item wasn't present, just insert at end
             self._mgr.insert(len(self._info_axis), key, value, refs)
         else:
-            self._iset_item_mgr(loc, value, refs)
+            self._iset_item_mgr(loc, value, refs=refs)
 
         # check if we are modifying a copy
         # try to set first as we want an invalid
@@ -4126,7 +4126,7 @@ def _set_item(self, key, value) -> None:
         Series/TimeSeries will be conformed to the DataFrames index to
         ensure homogeneity.
         """
-        value, refs = self._sanitize_column(value)
+        value, refs = self._sanitize_column(value, using_cow=using_copy_on_write())
 
         if (
             key in self.columns
@@ -4837,7 +4837,9 @@ def assign(self, **kwargs) -> DataFrame:
             data[k] = com.apply_if_callable(v, data)
         return data
 
-    def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]:
+    def _sanitize_column(
+        self, value, using_cow: bool = False
+    ) -> tuple[ArrayLike, BlockValuesRefs | None]:
         """
         Ensures new columns (which go into the BlockManager as new blocks) are
         always copied and converted into an array.
@@ -4857,9 +4859,7 @@ def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]:
         if is_dict_like(value):
             if not isinstance(value, Series):
                 value = Series(value)
-            return _reindex_for_setitem(
-                value, self.index, using_cow=using_copy_on_write()
-            )
+            return _reindex_for_setitem(value, self.index, using_cow=using_cow)
 
         if is_list_like(value):
             com.require_length_match(value, self.index)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 32b159983308a..9015f14d15d25 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -889,7 +889,9 @@ def _slice(
 
         return self.values[slicer]
 
-    def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:
+    def set_inplace(
+        self, locs, values: ArrayLike, copy: bool = False, refs=None
+    ) -> None:
         """
         Modify block values in-place with new item value.
 
@@ -903,6 +905,8 @@ def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:
 
         Caller is responsible for checking values.dtype == self.dtype.
         """
+        if refs is not None:
+            values = values.copy()
         if copy:
             self.values = self.values.copy()
         self.values[locs] = values
@@ -1853,9 +1857,14 @@ def iget(self, i: int | tuple[int, int] | tuple[slice, int]):
                 raise IndexError(f"{self} only contains one item")
             return self.values
 
-    def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:
+    def set_inplace(
+        self, locs, values: ArrayLike, copy: bool = False, refs=None
+    ) -> None:
         # When an ndarray, we should have locs.tolist() == [0]
         # When a BlockPlacement we should have list(locs) == [0]
+        if refs is not None:
+            self = self.make_block(values, refs=refs)
+            return
         if copy:
             self.values = self.values.copy()
         self.values[:] = values
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 31908c8f5e61a..93b00ccf426d8 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1114,7 +1114,11 @@ def column_arrays(self) -> list[np.ndarray]:
         return result  # type: ignore[return-value]
 
     def iset(
-        self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False
+        self,
+        loc: int | slice | np.ndarray,
+        value: ArrayLike,
+        inplace: bool = False,
+        refs=None,
     ):
         """
         Set new item in-place. Does not consolidate. Adds new Block if not
@@ -1155,6 +1159,7 @@ def iset(
                     inplace=inplace,
                     blkno=blkno,
                     blk=blk,
+                    refs=refs,
                 )
 
             # error: Incompatible types in assignment (expression has type
@@ -1186,7 +1191,9 @@ def value_getitem(placement):
             if inplace and blk.should_store(value):
                 # Updating inplace -> check if we need to do Copy-on-Write
                 if using_copy_on_write() and not self._has_no_reference_block(blkno_l):
-                    self._iset_split_block(blkno_l, blk_locs, value_getitem(val_locs))
+                    self._iset_split_block(
+                        blkno_l, blk_locs, value_getitem(val_locs), refs=refs
+                    )
                 else:
                     blk.set_inplace(blk_locs, value_getitem(val_locs))
                     continue
@@ -1230,6 +1237,7 @@ def value_getitem(placement):
                     new_block_2d(
                         values=value,
                         placement=BlockPlacement(slice(mgr_loc, mgr_loc + 1)),
+                        refs=refs,
                     )
                     for mgr_loc in unfit_idxr
                 )
@@ -1245,6 +1253,7 @@ def value_getitem(placement):
                     new_block_2d(
                         values=value_getitem(unfit_val_items),
                         placement=BlockPlacement(unfit_idxr),
+                        refs=refs,
                     )
                 )
 
@@ -1261,6 +1270,7 @@ def _iset_split_block(
         blkno_l: int,
         blk_locs: np.ndarray | list[int],
         value: ArrayLike | None = None,
+        refs=None,
     ) -> None:
         """Removes columns from a block by splitting the block.
 
@@ -1282,7 +1292,7 @@ def _iset_split_block(
         nbs_tup = tuple(blk.delete(blk_locs))
         if value is not None:
             locs = blk.mgr_locs.as_array[blk_locs]
-            first_nb = new_block_2d(value, BlockPlacement(locs))
+            first_nb = new_block_2d(value, BlockPlacement(locs), refs=refs)
         else:
             first_nb = nbs_tup[0]
             nbs_tup = tuple(nbs_tup[1:])
@@ -1304,7 +1314,13 @@ def _iset_split_block(
             self._blknos[nb.mgr_locs.indexer] = i + nr_blocks
 
     def _iset_single(
-        self, loc: int, value: ArrayLike, inplace: bool, blkno: int, blk: Block
+        self,
+        loc: int,
+        value: ArrayLike,
+        inplace: bool,
+        blkno: int,
+        blk: Block,
+        refs=None,
     ) -> None:
         """
         Fastpath for iset when we are only setting a single position and
@@ -1321,10 +1337,10 @@ def _iset_single(
                 # perform Copy-on-Write and clear the reference
                 copy = True
             iloc = self.blklocs[loc]
-            blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy)
+            blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy, refs=refs)
             return
 
-        nb = new_block_2d(value, placement=blk._mgr_locs)
+        nb = new_block_2d(value, placement=blk._mgr_locs, refs=refs)
         old_blocks = self.blocks
         new_blocks = old_blocks[:blkno] + (nb,) + old_blocks[blkno + 1 :]
         self.blocks = new_blocks
diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
index ca2954f0d390e..2579f5c2f6627 100644
--- a/pandas/tests/copy_view/test_indexing.py
+++ b/pandas/tests/copy_view/test_indexing.py
@@ -977,7 +977,7 @@ def test_column_as_series_no_item_cache(
 # TODO add tests for other indexing methods on the Series
 
 
-def test_dataframe_add_column_from_series(backend):
+def test_dataframe_add_column_from_series(backend, using_copy_on_write):
     # Case: adding a new column to a DataFrame from an existing column/series
     # -> always already takes a copy on assignment
     # (no change in behaviour here)
@@ -987,7 +987,10 @@ def test_dataframe_add_column_from_series(backend):
 
     s = Series([10, 11, 12])
     df["new"] = s
-    assert not np.shares_memory(get_array(df, "new"), s.values)
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df, "new"), get_array(s))
+    else:
+        assert not np.shares_memory(get_array(df, "new"), get_array(s))
 
     # editing series -> doesn't modify column in frame
     s[0] = 0
diff --git a/pandas/tests/copy_view/test_setitem.py b/pandas/tests/copy_view/test_setitem.py
index 9e0d350dde0de..8b894f371c78d 100644
--- a/pandas/tests/copy_view/test_setitem.py
+++ b/pandas/tests/copy_view/test_setitem.py
@@ -34,9 +34,7 @@ def test_set_column_with_series(using_copy_on_write):
     df["c"] = ser
 
     if using_copy_on_write:
-        # TODO(CoW) with CoW we can delay the copy
-        # assert np.shares_memory(df["c"].values, ser.values)
-        assert not np.shares_memory(df["c"].values, ser.values)
+        assert np.shares_memory(df["c"].values, ser.values)
     else:
         # the series data is copied
         assert not np.shares_memory(df["c"].values, ser.values)
@@ -79,9 +77,7 @@ def test_set_columns_with_dataframe(using_copy_on_write):
     df[["c", "d"]] = df2
 
     if using_copy_on_write:
-        # TODO(CoW) with CoW we can delay the copy
-        # assert np.shares_memory(df["c"].values, df2["c"].values)
-        assert not np.shares_memory(df["c"].values, df2["c"].values)
+        assert np.shares_memory(df["c"].values, df2["c"].values)
     else:
         # the data is copied
         assert not np.shares_memory(df["c"].values, df2["c"].values)
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index c20db86904d06..8c520310663e4 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -514,8 +514,10 @@ def test_setitem_intervals(self):
         tm.assert_series_equal(df["C"], df["E"], check_names=False)
 
     def test_setitem_categorical(self):
+        pd.options.mode.copy_on_write = True
         # GH#35369
         df = DataFrame({"h": Series(list("mn")).astype("category")})
+        # view = df[:]
         df.h = df.h.cat.reorder_categories(["n", "m"])
         expected = DataFrame(
             {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])}

From 290facdda4e46a79251f195732c7515c4a457237 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 1 Mar 2023 00:50:01 +0100
Subject: [PATCH 03/14] Fix array manager

---
 pandas/core/internals/array_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 87e549ae023ef..b86aa48fc15da 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -872,7 +872,7 @@ def column_setitem(
             # update existing ArrayManager in-place
             self.arrays[loc] = new_mgr.arrays[0]
 
-    def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
+    def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None:
         """
         Insert item at selected position.
 

From d75f46adc6d941670016d6716ff81d7b522cda9c Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 15 Mar 2023 15:32:45 +0100
Subject: [PATCH 04/14] Add type hint

---
 pandas/core/internals/managers.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 63e0803da5f44..9d00ef10bb2b0 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -22,7 +22,10 @@
     internals as libinternals,
     lib,
 )
-from pandas._libs.internals import BlockPlacement
+from pandas._libs.internals import (
+    BlockPlacement,
+    BlockValuesRefs,
+)
 from pandas.errors import PerformanceWarning
 from pandas.util._decorators import cache_readonly
 from pandas.util._exceptions import find_stack_level
@@ -1129,7 +1132,7 @@ def iset(
         loc: int | slice | np.ndarray,
         value: ArrayLike,
         inplace: bool = False,
-        refs=None,
+        refs: BlockValuesRefs | None = None,
     ):
         """
         Set new item in-place. Does not consolidate. Adds new Block if not

From c61dffbe55104177108909bf3e03feba0b38e9fa Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 15 Mar 2023 15:38:23 +0100
Subject: [PATCH 05/14] Remove unnecessary

---
 pandas/core/frame.py              | 2 +-
 pandas/core/internals/blocks.py   | 6 +-----
 pandas/core/internals/managers.py | 2 +-
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 976a311be28f0..aa97521bde780 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -12043,7 +12043,7 @@ def _reindex_for_setitem(
 
     if value.index.equals(index) or not len(index):
         if using_cow and isinstance(value, Series):
-            return value._values, value._mgr.blocks[0].refs
+            return value._values, value._references
         return value._values.copy(), None
 
     # GH#4107
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 3276e68657dd6..f139fb78d483a 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -905,9 +905,7 @@ def _slice(
 
         return self.values[slicer]
 
-    def set_inplace(
-        self, locs, values: ArrayLike, copy: bool = False, refs=None
-    ) -> None:
+    def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:
         """
         Modify block values in-place with new item value.
 
@@ -921,8 +919,6 @@ def set_inplace(
 
         Caller is responsible for checking values.dtype == self.dtype.
         """
-        if refs is not None:
-            values = values.copy()
         if copy:
             self.values = self.values.copy()
         self.values[locs] = values
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 9d00ef10bb2b0..8d3bf9c0e1c82 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1351,7 +1351,7 @@ def _iset_single(
                 # perform Copy-on-Write and clear the reference
                 copy = True
             iloc = self.blklocs[loc]
-            blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy, refs=refs)
+            blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy)
             return
 
         nb = new_block_2d(value, placement=blk._mgr_locs, refs=refs)

From 7bdff6bb2c785f224a99b81d1e4f83da2492ecab Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 15 Mar 2023 15:42:33 +0100
Subject: [PATCH 06/14] Fix typing

---
 pandas/core/frame.py            | 10 ++++++++--
 pandas/core/internals/blocks.py |  7 +------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index aa97521bde780..5a32bf02933bc 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4131,13 +4131,19 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
         self[key] = value[value.columns[0]]
 
     def _iset_item_mgr(
-        self, loc: int | slice | np.ndarray, value, inplace: bool = False, refs=None
+        self,
+        loc: int | slice | np.ndarray,
+        value,
+        inplace: bool = False,
+        refs: BlockValuesRefs | None = None,
     ) -> None:
         # when called from _set_item_mgr loc can be anything returned from get_loc
         self._mgr.iset(loc, value, inplace=inplace, refs=refs)
         self._clear_item_cache()
 
-    def _set_item_mgr(self, key, value: ArrayLike, refs=None) -> None:
+    def _set_item_mgr(
+        self, key, value: ArrayLike, refs: BlockValuesRefs | None = None
+    ) -> None:
         try:
             loc = self._info_axis.get_loc(key)
         except KeyError:
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index f139fb78d483a..cb336d2f718a6 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1902,14 +1902,9 @@ def iget(self, i: int | tuple[int, int] | tuple[slice, int]):
                 raise IndexError(f"{self} only contains one item")
             return self.values
 
-    def set_inplace(
-        self, locs, values: ArrayLike, copy: bool = False, refs=None
-    ) -> None:
+    def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:
         # When an ndarray, we should have locs.tolist() == [0]
         # When a BlockPlacement we should have list(locs) == [0]
-        if refs is not None:
-            self = self.make_block(values, refs=refs)
-            return
         if copy:
             self.values = self.values.copy()
         self.values[:] = values

From 90777f52313b4ede83ea7e8558b695b50161d872 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 15 Mar 2023 15:55:43 +0100
Subject: [PATCH 07/14] Add tests

---
 pandas/conftest.py                          |  1 +
 pandas/core/frame.py                        |  1 +
 pandas/core/internals/managers.py           |  9 +++---
 pandas/tests/copy_view/test_indexing.py     | 31 +++++++++++++++++++++
 pandas/tests/frame/indexing/test_setitem.py |  2 --
 5 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 95bb2078d151c..fcf74bbf796dd 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1921,6 +1921,7 @@ def using_copy_on_write() -> bool:
     """
     Fixture to check if Copy-on-Write is enabled.
     """
+    pd.options.mode.copy_on_write = True
     return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block"
 
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5a32bf02933bc..668ff2bb97fc6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4192,6 +4192,7 @@ def _set_item(self, key, value) -> None:
                 existing_piece = self[key]
                 if isinstance(existing_piece, DataFrame):
                     value = np.tile(value, (len(existing_piece.columns), 1)).T
+                    refs = None
 
         self._set_item_mgr(key, value, refs)
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 8d3bf9c0e1c82..ba875c8f27f0e 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1205,9 +1205,7 @@ def value_getitem(placement):
             if inplace and blk.should_store(value):
                 # Updating inplace -> check if we need to do Copy-on-Write
                 if using_copy_on_write() and not self._has_no_reference_block(blkno_l):
-                    self._iset_split_block(
-                        blkno_l, blk_locs, value_getitem(val_locs), refs=refs
-                    )
+                    self._iset_split_block(blkno_l, blk_locs, value_getitem(val_locs))
                 else:
                     blk.set_inplace(blk_locs, value_getitem(val_locs))
                     continue
@@ -1221,7 +1219,7 @@ def value_getitem(placement):
                     continue
                 else:
                     # Defer setting the new values to enable consolidation
-                    self._iset_split_block(blkno_l, blk_locs)
+                    self._iset_split_block(blkno_l, blk_locs, refs=refs)
 
         if len(removed_blknos):
             # Remove blocks & update blknos accordingly
@@ -1284,7 +1282,7 @@ def _iset_split_block(
         blkno_l: int,
         blk_locs: np.ndarray | list[int],
         value: ArrayLike | None = None,
-        refs=None,
+        refs: BlockValuesRefs | None = None,
     ) -> None:
         """Removes columns from a block by splitting the block.
 
@@ -1297,6 +1295,7 @@ def _iset_split_block(
         blkno_l: The block number to operate on, relevant for updating the manager
         blk_locs: The locations of our block that should be deleted.
         value: The value to set as a replacement.
+        refs: The reference tracking object of the value to set.
         """
         blk = self.blocks[blkno_l]
 
diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
index 365490c957e49..2beb51cbe1359 100644
--- a/pandas/tests/copy_view/test_indexing.py
+++ b/pandas/tests/copy_view/test_indexing.py
@@ -1052,3 +1052,34 @@ def test_getitem_midx_slice(using_copy_on_write, using_array_manager):
     if using_copy_on_write:
         new_df.iloc[0, 0] = 100
         tm.assert_frame_equal(df_orig, df)
+
+
+def test_setitem_series_no_copy(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3]})
+    rhs = Series([4, 5, 6])
+    rhs_orig = rhs.copy()
+    df["b"] = rhs
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(rhs), get_array(df, "b"))
+
+    df.iloc[0, 1] = 100
+    tm.assert_series_equal(rhs, rhs_orig)
+
+    df["a"] = rhs
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(rhs), get_array(df, "a"))
+
+    df.iloc[0, 0] = 100
+    tm.assert_series_equal(rhs, rhs_orig)
+
+
+def test_setitem_series_no_copy_split_block(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": 1})
+    rhs = Series([4, 5, 6])
+    rhs_orig = rhs.copy()
+    df["b"] = rhs
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(rhs), get_array(df, "b"))
+
+    df.iloc[0, 1] = 100
+    tm.assert_series_equal(rhs, rhs_orig)
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index 612a4a0a0c1b2..998ac9c8395ce 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -514,10 +514,8 @@ def test_setitem_intervals(self):
         tm.assert_series_equal(df["C"], df["E"], check_names=False)
 
     def test_setitem_categorical(self):
-        pd.options.mode.copy_on_write = True
         # GH#35369
         df = DataFrame({"h": Series(list("mn")).astype("category")})
-        # view = df[:]
         df.h = df.h.cat.reorder_categories(["n", "m"])
         expected = DataFrame(
             {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])}

From 46256fe331f6fa86cabf2874cc215ebf6a4dc625 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 15 Mar 2023 15:56:29 +0100
Subject: [PATCH 08/14] Add tests

---
 pandas/conftest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index fcf74bbf796dd..95bb2078d151c 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1921,7 +1921,6 @@ def using_copy_on_write() -> bool:
     """
     Fixture to check if Copy-on-Write is enabled.
     """
-    pd.options.mode.copy_on_write = True
     return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block"
 
 

From 2bc836b0589cfc91fc3246572f5e6ab17c076fc0 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 15 Mar 2023 15:58:41 +0100
Subject: [PATCH 09/14] Move tests

---
 pandas/core/internals/managers.py       |  2 +-
 pandas/tests/copy_view/test_indexing.py | 31 ------------------------
 pandas/tests/copy_view/test_setitem.py  | 32 +++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index ba875c8f27f0e..a77720cfdcb40 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1333,7 +1333,7 @@ def _iset_single(
         inplace: bool,
         blkno: int,
         blk: Block,
-        refs=None,
+        refs: BlockValuesRefs | None = None,
     ) -> None:
         """
         Fastpath for iset when we are only setting a single position and
diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
index 2beb51cbe1359..365490c957e49 100644
--- a/pandas/tests/copy_view/test_indexing.py
+++ b/pandas/tests/copy_view/test_indexing.py
@@ -1052,34 +1052,3 @@ def test_getitem_midx_slice(using_copy_on_write, using_array_manager):
     if using_copy_on_write:
         new_df.iloc[0, 0] = 100
         tm.assert_frame_equal(df_orig, df)
-
-
-def test_setitem_series_no_copy(using_copy_on_write):
-    df = DataFrame({"a": [1, 2, 3]})
-    rhs = Series([4, 5, 6])
-    rhs_orig = rhs.copy()
-    df["b"] = rhs
-    if using_copy_on_write:
-        assert np.shares_memory(get_array(rhs), get_array(df, "b"))
-
-    df.iloc[0, 1] = 100
-    tm.assert_series_equal(rhs, rhs_orig)
-
-    df["a"] = rhs
-    if using_copy_on_write:
-        assert np.shares_memory(get_array(rhs), get_array(df, "a"))
-
-    df.iloc[0, 0] = 100
-    tm.assert_series_equal(rhs, rhs_orig)
-
-
-def test_setitem_series_no_copy_split_block(using_copy_on_write):
-    df = DataFrame({"a": [1, 2, 3], "b": 1})
-    rhs = Series([4, 5, 6])
-    rhs_orig = rhs.copy()
-    df["b"] = rhs
-    if using_copy_on_write:
-        assert np.shares_memory(get_array(rhs), get_array(df, "b"))
-
-    df.iloc[0, 1] = 100
-    tm.assert_series_equal(rhs, rhs_orig)
diff --git a/pandas/tests/copy_view/test_setitem.py b/pandas/tests/copy_view/test_setitem.py
index 8b894f371c78d..5f0d23d92a02b 100644
--- a/pandas/tests/copy_view/test_setitem.py
+++ b/pandas/tests/copy_view/test_setitem.py
@@ -7,6 +7,7 @@
     Series,
 )
 import pandas._testing as tm
+from pandas.tests.copy_view.util import get_array
 
 # -----------------------------------------------------------------------------
 # Copy/view behaviour for the values that are set in a DataFrame
@@ -85,3 +86,34 @@ def test_set_columns_with_dataframe(using_copy_on_write):
     # and modifying the set DataFrame does not modify the original DataFrame
     df2.iloc[0, 0] = 0
     tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c"))
+
+
+def test_setitem_series_no_copy(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3]})
+    rhs = Series([4, 5, 6])
+    rhs_orig = rhs.copy()
+    df["b"] = rhs
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(rhs), get_array(df, "b"))
+
+    df.iloc[0, 1] = 100
+    tm.assert_series_equal(rhs, rhs_orig)
+
+    df["a"] = rhs
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(rhs), get_array(df, "a"))
+
+    df.iloc[0, 0] = 100
+    tm.assert_series_equal(rhs, rhs_orig)
+
+
+def test_setitem_series_no_copy_split_block(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": 1})
+    rhs = Series([4, 5, 6])
+    rhs_orig = rhs.copy()
+    df["b"] = rhs
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(rhs), get_array(df, "b"))
+
+    df.iloc[0, 1] = 100
+    tm.assert_series_equal(rhs, rhs_orig)

From 14d77da22380b525f5137f0bab8a3259984f4759 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 15 Mar 2023 16:17:20 +0100
Subject: [PATCH 10/14] Add midx test

---
 pandas/tests/copy_view/test_setitem.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pandas/tests/copy_view/test_setitem.py b/pandas/tests/copy_view/test_setitem.py
index 5f0d23d92a02b..0991850580b55 100644
--- a/pandas/tests/copy_view/test_setitem.py
+++ b/pandas/tests/copy_view/test_setitem.py
@@ -3,6 +3,7 @@
 from pandas import (
     DataFrame,
     Index,
+    MultiIndex,
     RangeIndex,
     Series,
 )
@@ -117,3 +118,15 @@ def test_setitem_series_no_copy_split_block(using_copy_on_write):
 
     df.iloc[0, 1] = 100
     tm.assert_series_equal(rhs, rhs_orig)
+
+
+def test_setitem_series_column_midx_broadcasting(using_copy_on_write):
+    df = DataFrame(
+        [[1, 2, 3], [3, 4, 5]],
+        columns=MultiIndex.from_arrays([["a", "a", "b"], [1, 2, 3]]),
+    )
+    rhs = Series([10, 11])
+    df["a"] = rhs
+    assert not np.shares_memory(get_array(rhs), df._get_column_array(0))
+    if using_copy_on_write:
+        assert df._mgr._has_no_reference(0)

From fcb0bb44029969f912b9221d376df22add854c94 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 15 Mar 2023 16:55:01 +0100
Subject: [PATCH 11/14] Fix missing arg

---
 pandas/core/internals/array_manager.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 5242fa6913592..b3a6b9bb23a78 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -798,7 +798,11 @@ def column_arrays(self) -> list[ArrayLike]:
         return [np.asarray(arr) for arr in self.arrays]
 
     def iset(
-        self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False
+        self,
+        loc: int | slice | np.ndarray,
+        value: ArrayLike,
+        inplace: bool = False,
+        refs=None,
     ) -> None:
         """
         Set new column(s).

From d6ed1cc62e5c874a7ede2d33b998cc2ab7e051d5 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 15 Mar 2023 22:44:21 +0100
Subject: [PATCH 12/14] Remove

---
 pandas/tests/copy_view/test_indexing.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
index 365490c957e49..63f0ffe8542e2 100644
--- a/pandas/tests/copy_view/test_indexing.py
+++ b/pandas/tests/copy_view/test_indexing.py
@@ -979,9 +979,7 @@ def test_column_as_series_no_item_cache(
 
 def test_dataframe_add_column_from_series(backend, using_copy_on_write):
     # Case: adding a new column to a DataFrame from an existing column/series
-    # -> always already takes a copy on assignment
-    # (no change in behaviour here)
-    # TODO can we achieve the same behaviour with Copy-on-Write?
+    # -> delays copy under CoW
     _, DataFrame, Series = backend
     df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
 
@@ -997,11 +995,6 @@ def test_dataframe_add_column_from_series(backend, using_copy_on_write):
     expected = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "new": [10, 11, 12]})
     tm.assert_frame_equal(df, expected)
 
-    # editing column in frame -> doesn't modify series
-    df.loc[2, "new"] = 100
-    expected_s = Series([0, 11, 12])
-    tm.assert_series_equal(s, expected_s)
-
 
 @pytest.mark.parametrize("val", [100, "a"])
 @pytest.mark.parametrize(

From 1997e3fb67f3eef9f10cf1b8279f2ea24f3f2fec Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 May 2023 12:52:39 +0200
Subject: [PATCH 13/14] add some more comments to the tests

---
 pandas/tests/copy_view/test_setitem.py | 30 ++++++++++++++++++++------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/copy_view/test_setitem.py b/pandas/tests/copy_view/test_setitem.py
index 0991850580b55..2a99a00e249fa 100644
--- a/pandas/tests/copy_view/test_setitem.py
+++ b/pandas/tests/copy_view/test_setitem.py
@@ -22,7 +22,7 @@ def test_set_column_with_array():
     df["c"] = arr
 
     # the array data is copied
-    assert not np.shares_memory(df["c"].values, arr)
+    assert not np.shares_memory(get_array(df, "c"), arr)
     # and thus modifying the array does not modify the DataFrame
     arr[0] = 0
     tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
@@ -30,16 +30,17 @@ def test_set_column_with_array():
 
 def test_set_column_with_series(using_copy_on_write):
     # Case: setting a series as a new column (df[col] = s) copies that data
+    # (with delayed copy with CoW)
     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
     ser = Series([1, 2, 3])
 
     df["c"] = ser
 
     if using_copy_on_write:
-        assert np.shares_memory(df["c"].values, ser.values)
+        assert np.shares_memory(get_array(df, "c"), get_array(ser))
     else:
         # the series data is copied
-        assert not np.shares_memory(df["c"].values, ser.values)
+        assert not np.shares_memory(get_array(df, "c"), get_array(ser))
 
     # and modifying the series does not modify the DataFrame
     ser.iloc[0] = 0
@@ -55,7 +56,7 @@ def test_set_column_with_index(using_copy_on_write):
     df["c"] = idx
 
     # the index data is copied
-    assert not np.shares_memory(df["c"].values, idx.values)
+    assert not np.shares_memory(get_array(df, "c"), idx.values)
 
     # and thus modifying the index does not modify the DataFrame
     idx.values[0] = 0
@@ -66,23 +67,24 @@ def test_set_column_with_index(using_copy_on_write):
 
     df["d"] = idx
 
-    assert not np.shares_memory(df["d"].values, arr)
+    assert not np.shares_memory(get_array(df, "d"), arr)
     arr[0] = 0
     tm.assert_series_equal(df["d"], Series([1, 2, 3], name="d"))
 
 
 def test_set_columns_with_dataframe(using_copy_on_write):
     # Case: setting a DataFrame as new columns copies that data
+    # (with delayed copy with CoW)
     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
     df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
 
     df[["c", "d"]] = df2
 
     if using_copy_on_write:
-        assert np.shares_memory(df["c"].values, df2["c"].values)
+        assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
     else:
         # the data is copied
-        assert not np.shares_memory(df["c"].values, df2["c"].values)
+        assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
 
     # and modifying the set DataFrame does not modify the original DataFrame
     df2.iloc[0, 0] = 0
@@ -90,9 +92,12 @@ def test_set_columns_with_dataframe(using_copy_on_write):
 
 
 def test_setitem_series_no_copy(using_copy_on_write):
+    # Case: setting a Series as column into a DataFrame can delay copying that data
     df = DataFrame({"a": [1, 2, 3]})
     rhs = Series([4, 5, 6])
     rhs_orig = rhs.copy()
+
+    # adding a new column
     df["b"] = rhs
     if using_copy_on_write:
         assert np.shares_memory(get_array(rhs), get_array(df, "b"))
@@ -100,6 +105,13 @@ def test_setitem_series_no_copy(using_copy_on_write):
     df.iloc[0, 1] = 100
     tm.assert_series_equal(rhs, rhs_orig)
 
+
+def test_setitem_series_no_copy_single_block(using_copy_on_write):
+    # Overwriting an existing column that is a single block
+    df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
+    rhs = Series([4, 5, 6])
+    rhs_orig = rhs.copy()
+
     df["a"] = rhs
     if using_copy_on_write:
         assert np.shares_memory(get_array(rhs), get_array(df, "a"))
@@ -109,9 +121,11 @@ def test_setitem_series_no_copy(using_copy_on_write):
 
 
 def test_setitem_series_no_copy_split_block(using_copy_on_write):
+    # Overwriting an existing column that is part of a larger block
     df = DataFrame({"a": [1, 2, 3], "b": 1})
     rhs = Series([4, 5, 6])
     rhs_orig = rhs.copy()
+
     df["b"] = rhs
     if using_copy_on_write:
         assert np.shares_memory(get_array(rhs), get_array(df, "b"))
@@ -121,6 +135,8 @@ def test_setitem_series_no_copy_split_block(using_copy_on_write):
 
 
 def test_setitem_series_column_midx_broadcasting(using_copy_on_write):
+    # Setting a Series to multiple columns will repeat the data
+    # (currently copying the data eagerly)
     df = DataFrame(
         [[1, 2, 3], [3, 4, 5]],
         columns=MultiIndex.from_arrays([["a", "a", "b"], [1, 2, 3]]),

From 0e3d3f7d1537dbae9e608fdb3771cd6a4ea875d1 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 May 2023 13:14:39 +0200
Subject: [PATCH 14/14] update some docstrings

---
 pandas/core/frame.py              | 5 +++--
 pandas/core/internals/managers.py | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e934bdbfeb4df..b6c80a51cec16 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4879,7 +4879,8 @@ def _sanitize_column(
     ) -> tuple[ArrayLike, BlockValuesRefs | None]:
         """
         Ensures new columns (which go into the BlockManager as new blocks) are
-        always copied and converted into an array.
+        always copied (or a reference is being tracked to them under CoW)
+        and converted into an array.
 
         Parameters
         ----------
@@ -4887,7 +4888,7 @@ def _sanitize_column(
 
         Returns
         -------
-        numpy.ndarray or ExtensionArray
+        tuple of numpy.ndarray or ExtensionArray and optional BlockValuesRefs
         """
         self._ensure_valid_index(value)
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 57378fda92ff2..397f9d5b1bbe6 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1401,6 +1401,7 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None:
         loc : int
         item : hashable
         value : np.ndarray or ExtensionArray
+        refs : The reference tracking object of the value to set.
         """
         # insert to the axis; this could possibly raise a TypeError
         new_axis = self.items.insert(loc, item)