From d968cb3cd952751a5f2f29c28fe03ec39fb8852e Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 3 Feb 2023 11:10:26 +0100
Subject: [PATCH 01/22] Add ref tracking to Block

---
 pandas/_libs/internals.pyx | 60 +++++++++++++++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 7 deletions(-)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 3333ac1115177..3ad2c80994dbd 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -580,9 +580,16 @@ cdef class SharedBlock:
     """
     cdef:
         public BlockPlacement _mgr_locs
+        public BlockValuesRefs refs
         readonly int ndim
 
-    def __cinit__(self, values, placement: BlockPlacement, ndim: int):
+    def __cinit__(
+        self,
+        values,
+        placement: BlockPlacement,
+        ndim: int,
+        refs: BlockValuesRefs | None = None,
+    ):
         """
         Parameters
         ----------
@@ -594,6 +601,10 @@ cdef class SharedBlock:
         """
         self._mgr_locs = placement
         self.ndim = ndim
+        if refs is None:
+            self.refs = BlockValuesRefs(self)
+        else:
+            self.refs = refs
 
     cpdef __reduce__(self):
         args = (self.values, self.mgr_locs.indexer, self.ndim)
@@ -619,9 +630,15 @@ cdef class NumpyBlock(SharedBlock):
     cdef:
         public ndarray values
 
-    def __cinit__(self, ndarray values, BlockPlacement placement, int ndim):
+    def __cinit__(
+        self,
+        ndarray values,
+        BlockPlacement placement,
+        int ndim,
+        refs: BlockValuesRefs | None = None,
+    ):
         # set values here; the (implicit) call to SharedBlock.__cinit__ will
-        #  set placement and ndim
+        #  set placement, ndim and refs
         self.values = values
 
     cpdef NumpyBlock getitem_block_index(self, slice slicer):
@@ -641,9 +658,15 @@ cdef class NDArrayBackedBlock(SharedBlock):
     cdef public:
         NDArrayBacked values
 
-    def __cinit__(self, NDArrayBacked values, BlockPlacement placement, int ndim):
+    def __cinit__(
+        self,
+        NDArrayBacked values,
+        BlockPlacement placement,
+        int ndim,
+        refs: BlockValuesRefs | None = None,
+    ):
         # set values here; the (implicit) call to SharedBlock.__cinit__ will
-        #  set placement and ndim
+        #  set placement, ndim and refs
         self.values = values
 
     cpdef NDArrayBackedBlock getitem_block_index(self, slice slicer):
@@ -660,9 +683,15 @@ cdef class Block(SharedBlock):
     cdef:
         public object values
 
-    def __cinit__(self, object values, BlockPlacement placement, int ndim):
+    def __cinit__(
+        self,
+        object values,
+        BlockPlacement placement,
+        int ndim,
+        refs: BlockValuesRefs | None = None,
+    ):
         # set values here; the (implicit) call to SharedBlock.__cinit__ will
-        #  set placement and ndim
+        #  set placement, ndim and refs
         self.values = values
 
 
@@ -839,3 +868,20 @@ cdef class BlockManager:
         return type(self)(
             tuple(new_blocks), new_axes, new_refs, parent=self, verify_integrity=False
         )
+
+
+cdef class BlockValuesRefs:
+    cdef:
+        public list referenced_blocks
+
+    def __cinit__(self, blk: SharedBlock) -> None:
+        self.referenced_blocks = [weakref.ref(blk)]
+
+    cdef void add_reference(self, blk: SharedBlock):
+        self.referenced_blocks.append(weakref.ref(blk))
+
+    cdef bint has_reference(self):
+        self.referenced_blocks = [
+            obj for obj in self.referenced_blocks if obj() is not None
+        ]
+        return len(self.referenced_blocks) > 0

From 6a41cdb9dbe4837a18fcb6b63cc480962a28f238 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 3 Feb 2023 14:04:18 +0100
Subject: [PATCH 02/22] Add typing and poc in copy

---
 pandas/_libs/internals.pyi             | 14 +++++++++++++-
 pandas/_libs/internals.pyx             |  8 +++++---
 pandas/core/internals/blocks.py        | 12 ++++++++++--
 pandas/core/internals/managers.py      | 15 +++------------
 pandas/tests/copy_view/test_methods.py |  3 ++-
 5 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi
index 79bdbea71e4d8..5dfcc3726c84f 100644
--- a/pandas/_libs/internals.pyi
+++ b/pandas/_libs/internals.pyi
@@ -4,6 +4,7 @@ from typing import (
     final,
     overload,
 )
+import weakref
 
 import numpy as np
 
@@ -59,8 +60,13 @@ class SharedBlock:
     _mgr_locs: BlockPlacement
     ndim: int
     values: ArrayLike
+    refs: BlockValuesRefs
     def __init__(
-        self, values: ArrayLike, placement: BlockPlacement, ndim: int
+        self,
+        values: ArrayLike,
+        placement: BlockPlacement,
+        ndim: int,
+        refs: BlockValuesRefs | None = ...,
     ) -> None: ...
 
 class NumpyBlock(SharedBlock):
@@ -87,3 +93,9 @@ class BlockManager:
     ) -> None: ...
     def get_slice(self: T, slobj: slice, axis: int = ...) -> T: ...
     def _rebuild_blknos_and_blklocs(self) -> None: ...
+
+class BlockValuesRefs:
+    referenced_blocks: list[weakref.ref]
+    def __init__(self, blk: SharedBlock) -> None: ...
+    def add_reference(self, blk: SharedBlock) -> None: ...
+    def has_reference(self) -> bool: ...
diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 3ad2c80994dbd..8ffddc8e6394a 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -604,6 +604,7 @@ cdef class SharedBlock:
         if refs is None:
             self.refs = BlockValuesRefs(self)
         else:
+            refs.add_reference(self)
             self.refs = refs
 
     cpdef __reduce__(self):
@@ -877,11 +878,12 @@ cdef class BlockValuesRefs:
     def __cinit__(self, blk: SharedBlock) -> None:
         self.referenced_blocks = [weakref.ref(blk)]
 
-    cdef void add_reference(self, blk: SharedBlock):
+    def add_reference(self, blk: SharedBlock) -> None:
         self.referenced_blocks.append(weakref.ref(blk))
 
-    cdef bint has_reference(self):
+    def has_reference(self) -> bool:
         self.referenced_blocks = [
             obj for obj in self.referenced_blocks if obj() is not None
         ]
-        return len(self.referenced_blocks) > 0
+        # Checking for more references than block pointing to itself
+        return len(self.referenced_blocks) > 1
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index bb555690b867a..7b520f06a3b8b 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -20,7 +20,10 @@
     lib,
     writers,
 )
-from pandas._libs.internals import BlockPlacement
+from pandas._libs.internals import (
+    BlockPlacement,
+    BlockValuesRefs,
+)
 from pandas._libs.missing import NA
 from pandas._libs.tslibs import IncompatibleFrequency
 from pandas._typing import (
@@ -144,6 +147,7 @@ class Block(PandasObject):
 
     values: np.ndarray | ExtensionArray
     ndim: int
+    refs: BlockValuesRefs
     __init__: Callable
 
     __slots__ = ()
@@ -506,9 +510,13 @@ def to_native_types(self, na_rep: str = "nan", quoting=None, **kwargs) -> Block:
     def copy(self, deep: bool = True) -> Block:
         """copy constructor"""
         values = self.values
+        refs: BlockValuesRefs | None
         if deep:
             values = values.copy()
-        return type(self)(values, placement=self._mgr_locs, ndim=self.ndim)
+            refs = None
+        else:
+            refs = self.refs
+        return type(self)(values, placement=self._mgr_locs, ndim=self.ndim, refs=refs)
 
     # ---------------------------------------------------------------------
     # Replace
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 81c5810d29456..2b04a079f5ffb 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -256,8 +256,9 @@ def _has_no_reference_block(self, blkno: int) -> bool:
         """
         # TODO(CoW) include `or self.refs[blkno]() is None` ?
         return (
-            self.refs is None or self.refs[blkno] is None
-        ) and weakref.getweakrefcount(self.blocks[blkno]) == 0
+            (self.refs is None or self.refs[blkno] is None)
+            and weakref.getweakrefcount(self.blocks[blkno]) == 0
+        ) and not self.blocks[blkno].refs.has_reference()
 
     def _clear_reference_block(self, blkno: int) -> None:
         """
@@ -629,17 +630,7 @@ def copy_func(ax):
             new_axes = list(self.axes)
 
         res = self.apply("copy", deep=deep)
-        new_refs: list[weakref.ref | None] | None
-        if deep:
-            new_refs = None
-            parent = None
-        else:
-            new_refs = [weakref.ref(blk) for blk in self.blocks]
-            parent = self
-
         res.axes = new_axes
-        res.refs = new_refs
-        res.parent = parent
 
         if self.ndim > 1:
             # Avoid needing to re-compute these
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index fbd4bbfb38c27..0e7f361257a61 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -36,7 +36,8 @@ def test_copy_shallow(using_copy_on_write):
     # the shallow copy still shares memory
     assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
     if using_copy_on_write:
-        assert df_copy._mgr.refs is not None
+        assert df_copy._mgr.blocks[0].refs.has_reference()
+        assert df_copy._mgr.blocks[1].refs.has_reference()
 
     if using_copy_on_write:
         # mutating shallow copy doesn't mutate original

From b4779d240aba645172f765b525f1df888ca40e87 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 3 Feb 2023 16:18:16 +0100
Subject: [PATCH 03/22] Change reference tracking

---
 pandas/_libs/internals.pyx                  |  22 +-
 pandas/core/generic.py                      |   4 +-
 pandas/core/internals/blocks.py             |  21 +-
 pandas/core/internals/concat.py             |  18 +-
 pandas/core/internals/managers.py           | 213 +++++---------------
 pandas/core/internals/ops.py                |   2 +-
 pandas/core/reshape/concat.py               |  16 +-
 pandas/tests/copy_view/test_constructors.py |   4 +-
 pandas/tests/copy_view/test_internals.py    |  34 +---
 pandas/tests/copy_view/test_methods.py      |   3 +-
 10 files changed, 87 insertions(+), 250 deletions(-)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 8ffddc8e6394a..3ce0e2bb94eef 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -649,7 +649,7 @@ cdef class NumpyBlock(SharedBlock):
         Assumes self.ndim == 2
         """
         new_values = self.values[..., slicer]
-        return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
+        return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)
 
 
 cdef class NDArrayBackedBlock(SharedBlock):
@@ -677,7 +677,7 @@ cdef class NDArrayBackedBlock(SharedBlock):
         Assumes self.ndim == 2
         """
         new_values = self.values[..., slicer]
-        return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
+        return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)
 
 
 cdef class Block(SharedBlock):
@@ -703,15 +703,11 @@ cdef class BlockManager:
         public list axes
         public bint _known_consolidated, _is_consolidated
         public ndarray _blknos, _blklocs
-        public list refs
-        public object parent
 
     def __cinit__(
         self,
         blocks=None,
         axes=None,
-        refs=None,
-        parent=None,
         verify_integrity=True,
     ):
         # None as defaults for unpickling GH#42345
@@ -725,8 +721,6 @@ cdef class BlockManager:
 
         self.blocks = blocks
         self.axes = axes.copy()  # copy to make sure we are not remotely-mutable
-        self.refs = refs
-        self.parent = parent
 
         # Populate known_consolidate, blknos, and blklocs lazily
         self._known_consolidated = False
@@ -835,16 +829,12 @@ cdef class BlockManager:
             ndarray blknos, blklocs
 
         nbs = []
-        nrefs = []
         for blk in self.blocks:
             nb = blk.getitem_block_index(slobj)
             nbs.append(nb)
-            nrefs.append(weakref.ref(blk))
 
         new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)]
-        mgr = type(self)(
-            tuple(nbs), new_axes, nrefs, parent=self, verify_integrity=False
-        )
+        mgr = type(self)(tuple(nbs), new_axes, verify_integrity=False)
 
         # We can avoid having to rebuild blklocs/blknos
         blklocs = self._blklocs
@@ -857,7 +847,7 @@ cdef class BlockManager:
     def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
 
         if axis == 0:
-            new_blocks, new_refs = self._slice_take_blocks_ax0(slobj)
+            new_blocks = self._slice_take_blocks_ax0(slobj)
         elif axis == 1:
             return self._get_index_slice(slobj)
         else:
@@ -866,9 +856,7 @@ cdef class BlockManager:
         new_axes = list(self.axes)
         new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
 
-        return type(self)(
-            tuple(new_blocks), new_axes, new_refs, parent=self, verify_integrity=False
-        )
+        return type(self)(tuple(new_blocks), new_axes, verify_integrity=False)
 
 
 cdef class BlockValuesRefs:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 8fa86e80e1a44..2720d064d6f1c 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -768,8 +768,8 @@ def swapaxes(
             )
             assert isinstance(new_mgr, BlockManager)
             assert isinstance(self._mgr, BlockManager)
-            new_mgr.parent = self._mgr
-            new_mgr.refs = [weakref.ref(self._mgr.blocks[0])]
+            new_mgr.blocks[0].refs = self._mgr.blocks[0].refs
+            new_mgr.blocks[0].refs.add_reference(new_mgr.blocks[0])
             return self._constructor(new_mgr).__finalize__(self, method="swapaxes")
 
         elif (copy or copy is None) and self._mgr.is_single_block:
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 7b520f06a3b8b..e941a40357228 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -269,7 +269,8 @@ def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:
         if new_values.ndim != self.values.ndim:
             raise ValueError("Only same dim slicing is allowed")
 
-        return type(self)(new_values, new_mgr_locs, self.ndim)
+        refs = self.refs if isinstance(slicer, slice) else None
+        return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
 
     @final
     def getitem_block_columns(
@@ -285,7 +286,7 @@ def getitem_block_columns(
         if new_values.ndim != self.values.ndim:
             raise ValueError("Only same dim slicing is allowed")
 
-        return type(self)(new_values, new_mgr_locs, self.ndim)
+        return type(self)(new_values, new_mgr_locs, self.ndim, refs=self.refs)
 
     @final
     def _can_hold_element(self, element: Any) -> bool:
@@ -1352,6 +1353,10 @@ def delete(self, loc) -> list[Block]:
         new_blocks: list[Block] = []
 
         previous_loc = -1
+        # TODO(CoW): This is tricky, if parent block goes out of scope
+        # all split blocks are referencing each other even though they
+        # don't share data
+        refs = self.refs if self.refs.has_reference() else None
         for idx in loc:
 
             if idx == previous_loc + 1:
@@ -1362,7 +1367,9 @@ def delete(self, loc) -> list[Block]:
                 # argument type "Tuple[slice, slice]"
                 values = self.values[previous_loc + 1 : idx, :]  # type: ignore[call-overload]  # noqa
                 locs = mgr_locs_arr[previous_loc + 1 : idx]
-                nb = type(self)(values, placement=BlockPlacement(locs), ndim=self.ndim)
+                nb = type(self)(
+                    values, placement=BlockPlacement(locs), ndim=self.ndim, refs=refs
+                )
                 new_blocks.append(nb)
 
             previous_loc = idx
@@ -1818,7 +1825,7 @@ def getitem_block_index(self, slicer: slice) -> ExtensionBlock:
         # GH#42787 in principle this is equivalent to values[..., slicer], but we don't
         # require subclasses of ExtensionArray to support that form (for now).
         new_values = self.values[slicer]
-        return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
+        return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)
 
     def diff(self, n: int, axis: AxisInt = 1) -> list[Block]:
         # only reached with ndim == 2 and axis == 1
@@ -2119,7 +2126,9 @@ def new_block_2d(values: ArrayLike, placement: BlockPlacement):
     return klass(values, ndim=2, placement=placement)
 
 
-def new_block(values, placement, *, ndim: int) -> Block:
+def new_block(
+    values, placement, *, ndim: int, refs: BlockValuesRefs | None = None
+) -> Block:
     # caller is responsible for ensuring values is NOT a PandasArray
 
     if not isinstance(placement, BlockPlacement):
@@ -2130,7 +2139,7 @@ def new_block(values, placement, *, ndim: int) -> Block:
     klass = get_block_type(values.dtype)
 
     values = maybe_coerce_values(values)
-    return klass(values, ndim=ndim, placement=placement)
+    return klass(values, ndim=ndim, placement=placement, refs=refs)
 
 
 def check_ndim(values, placement: BlockPlacement, ndim: int) -> None:
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index d46b51a2ee954..bedd4d92a1ea3 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -7,7 +7,6 @@
     Sequence,
     cast,
 )
-import weakref
 
 import numpy as np
 
@@ -62,10 +61,7 @@
     ensure_block_shape,
     new_block_2d,
 )
-from pandas.core.internals.managers import (
-    BlockManager,
-    using_copy_on_write,
-)
+from pandas.core.internals.managers import BlockManager
 
 if TYPE_CHECKING:
     from pandas import Index
@@ -271,8 +267,6 @@ def _concat_managers_axis0(
 
     offset = 0
     blocks = []
-    refs: list[weakref.ref | None] = []
-    parents: list = []
     for i, mgr in enumerate(mgrs):
         # If we already reindexed, then we definitely don't need another copy
         made_copy = had_reindexers[i]
@@ -289,17 +283,9 @@ def _concat_managers_axis0(
             nb._mgr_locs = nb._mgr_locs.add(offset)
             blocks.append(nb)
 
-        if not made_copy and not copy and using_copy_on_write():
-            refs.extend([weakref.ref(blk) for blk in mgr.blocks])
-            parents.append(mgr)
-        elif using_copy_on_write():
-            refs.extend([None] * len(mgr.blocks))
-
         offset += len(mgr.items)
 
-    result_parents = parents if parents else None
-    result_ref = refs if refs else None
-    result = BlockManager(tuple(blocks), axes, parent=result_parents, refs=result_ref)
+    result = BlockManager(tuple(blocks), axes)
     return result
 
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 2b04a079f5ffb..66b77b13905d2 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -11,7 +11,6 @@
     cast,
 )
 import warnings
-import weakref
 
 import numpy as np
 
@@ -146,8 +145,6 @@ class BaseBlockManager(DataManager):
     _blklocs: npt.NDArray[np.intp]
     blocks: tuple[Block, ...]
     axes: list[Index]
-    refs: list[weakref.ref | None] | None
-    parent: object
 
     @property
     def ndim(self) -> int:
@@ -156,17 +153,11 @@ def ndim(self) -> int:
     _known_consolidated: bool
     _is_consolidated: bool
 
-    def __init__(self, blocks, axes, refs=None, verify_integrity: bool = True) -> None:
+    def __init__(self, blocks, axes, verify_integrity: bool = True) -> None:
         raise NotImplementedError
 
     @classmethod
-    def from_blocks(
-        cls: type_t[T],
-        blocks: list[Block],
-        axes: list[Index],
-        refs: list[weakref.ref | None] | None = None,
-        parent: object = None,
-    ) -> T:
+    def from_blocks(cls: type_t[T], blocks: list[Block], axes: list[Index]) -> T:
         raise NotImplementedError
 
     @property
@@ -254,20 +245,7 @@ def _has_no_reference_block(self, blkno: int) -> bool:
         (whether it references another array or is itself being referenced)
         Returns True if the block has no references.
         """
-        # TODO(CoW) include `or self.refs[blkno]() is None` ?
-        return (
-            (self.refs is None or self.refs[blkno] is None)
-            and weakref.getweakrefcount(self.blocks[blkno]) == 0
-        ) and not self.blocks[blkno].refs.has_reference()
-
-    def _clear_reference_block(self, blkno: int) -> None:
-        """
-        Clear any reference for column `i`.
-        """
-        if self.refs is not None:
-            self.refs[blkno] = None
-            if com.all_none(*self.refs):
-                self.parent = None
+        return not self.blocks[blkno].refs.has_reference()
 
     def get_dtypes(self):
         dtypes = np.array([blk.dtype for blk in self.blocks])
@@ -574,23 +552,17 @@ def _combine(
         # TODO(CoW) we could optimize here if we know that the passed blocks
         # are fully "owned" (eg created from an operation, not coming from
         # an existing manager)
-        new_refs: list[weakref.ref | None] | None = None if copy else []
         for b in blocks:
             nb = b.copy(deep=copy)
             nb.mgr_locs = BlockPlacement(inv_indexer[nb.mgr_locs.indexer])
             new_blocks.append(nb)
-            if not copy:
-                # None has no attribute "append"
-                new_refs.append(weakref.ref(b))  # type: ignore[union-attr]
 
         axes = list(self.axes)
         if index is not None:
             axes[-1] = index
         axes[0] = self.items.take(indexer)
 
-        return type(self).from_blocks(
-            new_blocks, axes, new_refs, parent=None if copy else self
-        )
+        return type(self).from_blocks(new_blocks, axes)
 
     @property
     def nblocks(self) -> int:
@@ -654,7 +626,7 @@ def consolidate(self: T) -> T:
         if self.is_consolidated():
             return self
 
-        bm = type(self)(self.blocks, self.axes, self.refs, verify_integrity=False)
+        bm = type(self)(self.blocks, self.axes, verify_integrity=False)
         bm._is_consolidated = False
         bm._consolidate_inplace()
         return bm
@@ -716,14 +688,14 @@ def reindex_indexer(
             raise IndexError("Requested axis not found in manager")
 
         if axis == 0:
-            new_blocks, new_refs = self._slice_take_blocks_ax0(
+            new_blocks = self._slice_take_blocks_ax0(
                 indexer,
                 fill_value=fill_value,
                 only_slice=only_slice,
                 use_na_proxy=use_na_proxy,
             )
-            parent = None if com.all_none(*new_refs) else self
         else:
+            print(self.blocks)
             new_blocks = [
                 blk.take_nd(
                     indexer,
@@ -734,13 +706,11 @@ def reindex_indexer(
                 )
                 for blk in self.blocks
             ]
-            new_refs = None
-            parent = None
 
         new_axes = list(self.axes)
         new_axes[axis] = new_axis
 
-        new_mgr = type(self).from_blocks(new_blocks, new_axes, new_refs, parent=parent)
+        new_mgr = type(self).from_blocks(new_blocks, new_axes)
         if axis == 1:
             # We can avoid the need to rebuild these
             new_mgr._blknos = self.blknos.copy()
@@ -754,7 +724,7 @@ def _slice_take_blocks_ax0(
         only_slice: bool = False,
         *,
         use_na_proxy: bool = False,
-    ) -> tuple[list[Block], list[weakref.ref | None]]:
+    ) -> list[Block]:
         """
         Slice/take blocks along axis=0.
 
@@ -789,9 +759,7 @@ def _slice_take_blocks_ax0(
                 if sllen == 0:
                     return [], []
                 bp = BlockPlacement(slice(0, sllen))
-                return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)], [
-                    weakref.ref(blk)
-                ]
+                return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)]
             elif not allow_fill or self.ndim == 1:
                 if allow_fill and fill_value is None:
                     fill_value = blk.fill_value
@@ -805,9 +773,7 @@ def _slice_take_blocks_ax0(
                         )
                         for i, ml in enumerate(slobj)
                     ]
-                    # We have
-                    #  all(np.shares_memory(nb.values, blk.values) for nb in blocks)
-                    return blocks, [weakref.ref(blk)] * len(blocks)
+                    return blocks
                 else:
                     bp = BlockPlacement(slice(0, sllen))
                     return [
@@ -817,7 +783,7 @@ def _slice_take_blocks_ax0(
                             new_mgr_locs=bp,
                             fill_value=fill_value,
                         )
-                    ], [None]
+                    ]
 
         if sl_type == "slice":
             blknos = self.blknos[slobj]
@@ -833,7 +799,6 @@ def _slice_take_blocks_ax0(
         # When filling blknos, make sure blknos is updated before appending to
         # blocks list, that way new blkno is exactly len(blocks).
         blocks = []
-        refs: list[weakref.ref | None] = []
         group = not only_slice
         for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=group):
             if blkno == -1:
@@ -846,7 +811,6 @@ def _slice_take_blocks_ax0(
                         use_na_proxy=use_na_proxy,
                     )
                 )
-                refs.append(None)
             else:
                 blk = self.blocks[blkno]
 
@@ -860,7 +824,6 @@ def _slice_take_blocks_ax0(
                         newblk = blk.copy(deep=False)
                         newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1))
                         blocks.append(newblk)
-                        refs.append(weakref.ref(blk))
 
                 else:
                     # GH#32779 to avoid the performance penalty of copying,
@@ -873,7 +836,6 @@ def _slice_take_blocks_ax0(
                     if isinstance(taker, slice):
                         nb = blk.getitem_block_columns(taker, new_mgr_locs=mgr_locs)
                         blocks.append(nb)
-                        refs.append(weakref.ref(blk))
                     elif only_slice:
                         # GH#33597 slice instead of take, so we get
                         #  views instead of copies
@@ -883,13 +845,11 @@ def _slice_take_blocks_ax0(
                             nb = blk.getitem_block_columns(slc, new_mgr_locs=bp)
                             # We have np.shares_memory(nb.values, blk.values)
                             blocks.append(nb)
-                            refs.append(weakref.ref(blk))
                     else:
                         nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs)
                         blocks.append(nb)
-                        refs.append(None)
 
-        return blocks, refs
+        return blocks
 
     def _make_na_block(
         self, placement: BlockPlacement, fill_value=None, use_na_proxy: bool = False
@@ -974,8 +934,6 @@ def __init__(
         self,
         blocks: Sequence[Block],
         axes: Sequence[Index],
-        refs: list[weakref.ref | None] | None = None,
-        parent: object = None,
         verify_integrity: bool = True,
     ) -> None:
 
@@ -1007,28 +965,13 @@ def _verify_integrity(self) -> None:
                 f"block items\n# manager items: {len(self.items)}, # "
                 f"tot_items: {tot_items}"
             )
-        if self.refs is not None:
-            if len(self.refs) != len(self.blocks):
-                raise AssertionError(
-                    "Number of passed refs must equal the number of blocks: "
-                    f"{len(self.refs)} refs vs {len(self.blocks)} blocks."
-                    "\nIf you see this error, please report a bug at "
-                    "https://github.com/pandas-dev/pandas/issues"
-                )
 
     @classmethod
-    def from_blocks(
-        cls,
-        blocks: list[Block],
-        axes: list[Index],
-        refs: list[weakref.ref | None] | None = None,
-        parent: object = None,
-    ) -> BlockManager:
+    def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> BlockManager:
         """
         Constructor for BlockManager and SingleBlockManager with same signature.
         """
-        parent = parent if using_copy_on_write() else None
-        return cls(blocks, axes, refs, parent, verify_integrity=False)
+        return cls(blocks, axes, verify_integrity=False)
 
     # ----------------------------------------------------------------
     # Indexing
@@ -1047,10 +990,14 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
         """
         if len(self.blocks) == 1:
             result = self.blocks[0].iget((slice(None), loc))
-            block = new_block(result, placement=slice(0, len(result)), ndim=1)
             # in the case of a single block, the new block is a view
-            ref = weakref.ref(self.blocks[0])
-            return SingleBlockManager(block, self.axes[0], [ref], parent=self)
+            block = new_block(
+                result,
+                placement=slice(0, len(result)),
+                ndim=1,
+                refs=self.blocks[0].refs,
+            )
+            return SingleBlockManager(block, self.axes[0])
 
         dtype = interleaved_dtype([blk.dtype for blk in self.blocks])
 
@@ -1093,10 +1040,10 @@ def iget(self, i: int, track_ref: bool = True) -> SingleBlockManager:
 
         # shortcut for select a single-dim from a 2-dim BM
         bp = BlockPlacement(slice(0, len(values)))
-        nb = type(block)(values, placement=bp, ndim=1)
-        ref = weakref.ref(block) if track_ref else None
-        parent = self if track_ref else None
-        return SingleBlockManager(nb, self.axes[1], [ref], parent)
+        nb = type(block)(
+            values, placement=bp, ndim=1, refs=block.refs if track_ref else None
+        )
+        return SingleBlockManager(nb, self.axes[1])
 
     def iget_values(self, i: int) -> ArrayLike:
         """
@@ -1228,7 +1175,7 @@ def value_getitem(placement):
                     self._iset_split_block(blkno_l, blk_locs)
 
         if len(removed_blknos):
-            # Remove blocks & update blknos and refs accordingly
+            # Remove blocks & update blknos accordingly
             is_deleted = np.zeros(self.nblocks, dtype=np.bool_)
             is_deleted[removed_blknos] = True
 
@@ -1239,18 +1186,14 @@ def value_getitem(placement):
             self.blocks = tuple(
                 blk for i, blk in enumerate(self.blocks) if i not in set(removed_blknos)
             )
-            if self.refs is not None:
-                self.refs = [
-                    ref
-                    for i, ref in enumerate(self.refs)
-                    if i not in set(removed_blknos)
-                ]
 
         if unfit_val_locs:
             unfit_idxr = np.concatenate(unfit_mgr_locs)
             unfit_count = len(unfit_idxr)
 
             new_blocks: list[Block] = []
+            # TODO(CoW) is this always correct to assume that the new_blocks
+            # are not referencing anything else?
             if value_is_extension_type:
                 # This code (ab-)uses the fact that EA blocks contain only
                 # one item.
@@ -1281,10 +1224,6 @@ def value_getitem(placement):
                 self._blklocs[unfit_idxr] = np.arange(unfit_count)
 
             self.blocks += tuple(new_blocks)
-            # TODO(CoW) is this always correct to assume that the new_blocks
-            # are not referencing anything else?
-            if self.refs is not None:
-                self.refs = list(self.refs) + [None] * len(new_blocks)
 
             # Newly created block's dtype may already be present.
             self._known_consolidated = False
@@ -1321,13 +1260,6 @@ def _iset_split_block(
             first_nb = nbs_tup[0]
             nbs_tup = tuple(nbs_tup[1:])
 
-        if self.refs is not None:
-            self.refs.extend([self.refs[blkno_l]] * len(nbs_tup))
-
-        if value is not None:
-            # Only clear if we set new values
-            self._clear_reference_block(blkno_l)
-
         nr_blocks = len(self.blocks)
         blocks_tup = (
             self.blocks[:blkno_l] + (first_nb,) + self.blocks[blkno_l + 1 :] + nbs_tup
@@ -1357,7 +1289,6 @@ def _iset_single(
             if using_copy_on_write() and not self._has_no_reference_block(blkno):
                 # perform Copy-on-Write and clear the reference
                 copy = True
-                self._clear_reference_block(blkno)
             iloc = self.blklocs[loc]
             blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy)
             return
@@ -1366,7 +1297,6 @@ def _iset_single(
         old_blocks = self.blocks
         new_blocks = old_blocks[:blkno] + (nb,) + old_blocks[blkno + 1 :]
         self.blocks = new_blocks
-        self._clear_reference_block(blkno)
         return
 
     def column_setitem(
@@ -1384,7 +1314,6 @@ def column_setitem(
             blocks = list(self.blocks)
             blocks[blkno] = blocks[blkno].copy()
             self.blocks = tuple(blocks)
-            self._clear_reference_block(blkno)
 
         # this manager is only created temporarily to mutate the values in place
         # so don't track references, otherwise the `setitem` would perform CoW again
@@ -1418,6 +1347,7 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
             value = ensure_block_shape(value, ndim=self.ndim)
 
         bp = BlockPlacement(slice(loc, loc + 1))
+        # TODO(CoW) do we always "own" the passed `value`?
         block = new_block_2d(values=value, placement=bp)
 
         if not len(self.blocks):
@@ -1430,9 +1360,6 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
 
         self.axes[0] = new_axis
         self.blocks += (block,)
-        # TODO(CoW) do we always "own" the passed `value`?
-        if self.refs is not None:
-            self.refs += [None]
 
         self._known_consolidated = False
 
@@ -1486,12 +1413,10 @@ def idelete(self, indexer) -> BlockManager:
         is_deleted[indexer] = True
         taker = (~is_deleted).nonzero()[0]
 
-        nbs, new_refs = self._slice_take_blocks_ax0(taker, only_slice=True)
+        nbs = self._slice_take_blocks_ax0(taker, only_slice=True)
         new_columns = self.items[~is_deleted]
         axes = [new_columns, self.axes[1]]
-        # TODO this might not be needed (can a delete ever be done in chained manner?)
-        parent = None if com.all_none(*new_refs) else self
-        return type(self)(tuple(nbs), axes, new_refs, parent, verify_integrity=False)
+        return type(self)(tuple(nbs), axes, verify_integrity=False)
 
     # ----------------------------------------------------------------
     # Block-wise Operation
@@ -1837,10 +1762,7 @@ def _consolidate_inplace(self) -> None:
         #  the DataFrame's _item_cache. The exception is for newly-created
         #  BlockManager objects not yet attached to a DataFrame.
         if not self.is_consolidated():
-            if self.refs is None:
-                self.blocks = _consolidate(self.blocks)
-            else:
-                self.blocks, self.refs = _consolidate_with_refs(self.blocks, self.refs)
+            self.blocks = _consolidate(self.blocks)
             self._is_consolidated = True
             self._known_consolidated = True
             self._rebuild_blknos_and_blklocs()
@@ -1862,8 +1784,6 @@ def __init__(
         self,
         block: Block,
         axis: Index,
-        refs: list[weakref.ref | None] | None = None,
-        parent: object = None,
         verify_integrity: bool = False,
     ) -> None:
         # Assertions disabled for performance
@@ -1872,25 +1792,19 @@ def __init__(
 
         self.axes = [axis]
         self.blocks = (block,)
-        self.refs = refs
-        self.parent = parent if using_copy_on_write() else None
 
     @classmethod
     def from_blocks(
         cls,
         blocks: list[Block],
         axes: list[Index],
-        refs: list[weakref.ref | None] | None = None,
-        parent: object = None,
     ) -> SingleBlockManager:
         """
         Constructor for BlockManager and SingleBlockManager with same signature.
         """
         assert len(blocks) == 1
         assert len(axes) == 1
-        if refs is not None:
-            assert len(refs) == 1
-        return cls(blocks[0], axes[0], refs, parent, verify_integrity=False)
+        return cls(blocks[0], axes[0], verify_integrity=False)
 
     @classmethod
     def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager:
@@ -1907,13 +1821,9 @@ def to_2d_mgr(self, columns: Index) -> BlockManager:
         blk = self.blocks[0]
         arr = ensure_block_shape(blk.values, ndim=2)
         bp = BlockPlacement(0)
-        new_blk = type(blk)(arr, placement=bp, ndim=2)
+        new_blk = type(blk)(arr, placement=bp, ndim=2, refs=blk.refs)
         axes = [columns, self.axes[0]]
-        refs: list[weakref.ref | None] = [weakref.ref(blk)]
-        parent = self if using_copy_on_write() else None
-        return BlockManager(
-            [new_blk], axes=axes, refs=refs, parent=parent, verify_integrity=False
-        )
+        return BlockManager([new_blk], axes=axes, verify_integrity=False)
 
     def _has_no_reference(self, i: int = 0) -> bool:
         """
@@ -1921,9 +1831,7 @@ def _has_no_reference(self, i: int = 0) -> bool:
         (whether it references another array or is itself being referenced)
         Returns True if the column has no references.
         """
-        return (self.refs is None or self.refs[0] is None) and weakref.getweakrefcount(
-            self.blocks[0]
-        ) == 0
+        return not self.blocks[0].refs.has_reference()
 
     def __getstate__(self):
         block_values = [b.values for b in self.blocks]
@@ -1991,19 +1899,18 @@ def getitem_mgr(self, indexer: slice | np.ndarray) -> SingleBlockManager:
             and com.is_bool_indexer(indexer)
             and indexer.all()
         ):
-            return type(self)(blk, self.index, [weakref.ref(blk)], parent=self)
+            return type(self)(blk.copy(deep=False), self.index)
         array = blk._slice(indexer)
         if array.ndim > 1:
             # This will be caught by Series._get_values
             raise ValueError("dimension-expanding indexing not allowed")
 
         bp = BlockPlacement(slice(0, len(array)))
-        block = type(blk)(array, placement=bp, ndim=1)
+        # TODO(CoW) in theory only need to track reference if new_array is a view
+        block = type(blk)(array, placement=bp, ndim=1, refs=blk.refs)
 
         new_idx = self.index[indexer]
-        # TODO(CoW) in theory only need to track reference if new_array is a view
-        ref = weakref.ref(blk)
-        return type(self)(block, new_idx, [ref], parent=self)
+        return type(self)(block, new_idx)
 
     def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager:
         # Assertion disabled for performance
@@ -2014,11 +1921,11 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager:
         blk = self._block
         array = blk._slice(slobj)
         bp = BlockPlacement(slice(0, len(array)))
-        block = type(blk)(array, placement=bp, ndim=1)
-        new_index = self.index._getitem_slice(slobj)
         # TODO this method is only used in groupby SeriesSplitter at the moment,
-        # so passing refs / parent is not yet covered by the tests
-        return type(self)(block, new_index, [weakref.ref(blk)], parent=self)
+        # so passing refs is not yet covered by the tests
+        block = type(blk)(array, placement=bp, ndim=1, refs=blk.refs)
+        new_index = self.index._getitem_slice(slobj)
+        return type(self)(block, new_index)
 
     @property
     def index(self) -> Index:
@@ -2064,8 +1971,6 @@ def setitem_inplace(self, indexer, value) -> None:
         """
         if using_copy_on_write() and not self._has_no_reference(0):
             self.blocks = (self._block.copy(),)
-            self.refs = None
-            self.parent = None
             self._cache.clear()
 
         super().setitem_inplace(indexer, value)
@@ -2080,9 +1985,6 @@ def idelete(self, indexer) -> SingleBlockManager:
         self.blocks = (nb,)
         self.axes[0] = self.axes[0].delete(indexer)
         self._cache.clear()
-        # clear reference since delete always results in a new array
-        self.refs = None
-        self.parent = None
         return self
 
     def fast_xs(self, loc):
@@ -2302,31 +2204,6 @@ def _consolidate(blocks: tuple[Block, ...]) -> tuple[Block, ...]:
     return tuple(new_blocks)
 
 
-def _consolidate_with_refs(
-    blocks: tuple[Block, ...], refs
-) -> tuple[tuple[Block, ...], list[weakref.ref | None]]:
-    """
-    Merge blocks having same dtype, exclude non-consolidating blocks, handling
-    refs
-    """
-    gkey = lambda x: x[0]._consolidate_key
-    grouper = itertools.groupby(sorted(zip(blocks, refs), key=gkey), gkey)
-
-    new_blocks: list[Block] = []
-    new_refs: list[weakref.ref | None] = []
-    for (_can_consolidate, dtype), group_blocks_refs in grouper:
-        group_blocks, group_refs = list(zip(*list(group_blocks_refs)))
-        merged_blocks, consolidated = _merge_blocks(
-            list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate
-        )
-        new_blocks = extend_blocks(merged_blocks, new_blocks)
-        if consolidated:
-            new_refs.extend([None])
-        else:
-            new_refs.extend(group_refs)
-    return tuple(new_blocks), new_refs
-
-
 def _merge_blocks(
     blocks: list[Block], dtype: DtypeObj, can_consolidate: bool
 ) -> tuple[list[Block], bool]:
diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py
index 477dc98aa2b2b..24fc51a96d9df 100644
--- a/pandas/core/internals/ops.py
+++ b/pandas/core/internals/ops.py
@@ -36,7 +36,7 @@ def _iter_block_pairs(
 
         left_ea = blk_vals.ndim == 1
 
-        rblks, _ = right._slice_take_blocks_ax0(locs.indexer, only_slice=True)
+        rblks = right._slice_take_blocks_ax0(locs.indexer, only_slice=True)
 
         # Assertions are disabled for performance, but should hold:
         # if left_ea:
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index f8220649bf890..f37b0604628c8 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -14,7 +14,6 @@
     cast,
     overload,
 )
-import weakref
 
 import numpy as np
 
@@ -551,8 +550,9 @@ def __init__(
                     obj = sample._constructor(obj, columns=[name], copy=False)
                     if using_copy_on_write():
                         # TODO(CoW): Remove when ref tracking in constructors works
-                        obj._mgr.parent = original_obj  # type: ignore[union-attr]
-                        obj._mgr.refs = [weakref.ref(original_obj._mgr.blocks[0])]  # type: ignore[union-attr]  # noqa: E501
+                        for i in range(len(original_obj._mgr.blocks)):
+                            obj._mgr.blocks[i].refs = original_obj._mgr.blocks[i].refs
+                            obj._mgr.blocks[i].refs.add_reference(obj._mgr.blocks[i])
 
                     obj.columns = [new_name]
 
@@ -612,13 +612,9 @@ def get_result(self):
                     typ=get_option("mode.data_manager"),
                 )
                 if using_copy_on_write() and not self.copy:
-                    parents = [obj._mgr for obj in self.objs]
-                    mgr.parent = parents  # type: ignore[union-attr]
-                    refs = [
-                        weakref.ref(obj._mgr.blocks[0])  # type: ignore[union-attr]
-                        for obj in self.objs
-                    ]
-                    mgr.refs = refs  # type: ignore[union-attr]
+                    for i in range(len(self.objs)):
+                        mgr.blocks[i].refs = self.objs[i]._mgr.blocks[0].refs
+                        mgr.blocks[i].refs.add_reference(mgr.blocks[i])
                 df = cons(mgr, copy=False)
                 df.columns = columns
                 return df.__finalize__(self, method="concat")
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
index c04c733e5ee1d..f5805455e326f 100644
--- a/pandas/tests/copy_view/test_constructors.py
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -18,7 +18,7 @@ def test_series_from_series(using_copy_on_write):
     assert np.shares_memory(ser.values, result.values)
 
     if using_copy_on_write:
-        assert result._mgr.refs is not None
+        assert result._mgr.blocks[0].refs.has_reference()
 
     if using_copy_on_write:
         # mutating new series copy doesn't mutate original
@@ -72,4 +72,4 @@ def test_series_from_series_with_reindex(using_copy_on_write):
     result = Series(ser, index=[0, 1, 2, 3])
     assert not np.shares_memory(ser.values, result.values)
     if using_copy_on_write:
-        assert result._mgr.refs is None or result._mgr.refs[0] is None
+        assert not result._mgr.blocks[0].refs.has_reference()
diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py
index 506ae7d3465c5..67022e533dbc4 100644
--- a/pandas/tests/copy_view/test_internals.py
+++ b/pandas/tests/copy_view/test_internals.py
@@ -20,52 +20,32 @@ def test_consolidate(using_copy_on_write):
     subset = df[:]
 
     # each block of subset references a block of df
-    assert subset._mgr.refs is not None and all(
-        ref is not None for ref in subset._mgr.refs
-    )
+    assert all(blk.refs.has_reference() for blk in subset._mgr.blocks)
 
     # consolidate the two int64 blocks
     subset._consolidate_inplace()
 
     # the float64 block still references the parent one because it still a view
-    assert subset._mgr.refs[0] is not None
+    assert subset._mgr.blocks[0].refs.has_reference()
     # equivalent of assert np.shares_memory(df["b"].values, subset["b"].values)
     # but avoids caching df["b"]
     assert np.shares_memory(get_array(df, "b"), get_array(subset, "b"))
 
     # the new consolidated int64 block does not reference another
-    assert subset._mgr.refs[1] is None
+    assert not subset._mgr.blocks[1].refs.has_reference()
 
     # the parent dataframe now also only is linked for the float column
-    assert df._mgr._has_no_reference(0)
-    assert not df._mgr._has_no_reference(1)
-    assert df._mgr._has_no_reference(2)
+    assert not df._mgr.blocks[0].refs.has_reference()
+    assert df._mgr.blocks[1].refs.has_reference()
+    assert not df._mgr.blocks[2].refs.has_reference()
 
     # and modifying subset still doesn't modify parent
     if using_copy_on_write:
         subset.iloc[0, 1] = 0.0
-        assert df._mgr._has_no_reference(1)
+        assert not df._mgr.blocks[1].refs.has_reference()
         assert df.loc[0, "b"] == 0.1
 
 
-@td.skip_array_manager_invalid_test
-def test_clear_parent(using_copy_on_write):
-    # ensure to clear parent reference if we are no longer viewing data from parent
-    if not using_copy_on_write:
-        pytest.skip("test only relevant when using copy-on-write")
-
-    df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
-    subset = df[:]
-    assert subset._mgr.parent is not None
-
-    # replacing existing columns loses the references to the parent df
-    subset["a"] = 0
-    assert subset._mgr.parent is not None
-    # when losing the last reference, also the parent should be reset
-    subset["b"] = 0
-    assert subset._mgr.parent is None
-
-
 @pytest.mark.single_cpu
 @td.skip_array_manager_invalid_test
 def test_switch_options():
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index 0e7f361257a61..dc5711f1b2ddd 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -22,7 +22,8 @@ def test_copy(using_copy_on_write):
     # the deep copy doesn't share memory
     assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
     if using_copy_on_write:
-        assert df_copy._mgr.refs is None
+        assert not df_copy._mgr.blocks[0].refs.has_reference()
+        assert not df_copy._mgr.blocks[1].refs.has_reference()
 
     # mutating copy doesn't mutate original
     df_copy.iloc[0, 0] = 0

From c7fe560459f1dc7412a3a55279e1c6d2af7b7450 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 3 Feb 2023 16:50:03 +0100
Subject: [PATCH 04/22] Fix mypy

---
 pandas/compat/pickle_compat.py    |  2 +-
 pandas/core/generic.py            |  4 +++-
 pandas/core/internals/managers.py |  3 +--
 pandas/core/reshape/concat.py     | 10 +++++-----
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py
index 2fbd3a6cdb046..bfee616d52aac 100644
--- a/pandas/compat/pickle_compat.py
+++ b/pandas/compat/pickle_compat.py
@@ -168,7 +168,7 @@ def load_newobj(self) -> None:
         arr = np.array([], dtype="m8[ns]")
         obj = cls.__new__(cls, arr, arr.dtype)
     elif cls is BlockManager and not args:
-        obj = cls.__new__(cls, (), [], None, False)
+        obj = cls.__new__(cls, (), [], False)
     else:
         obj = cls.__new__(cls, *args)
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 2720d064d6f1c..2fc5c25dac039 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -769,7 +769,9 @@ def swapaxes(
             assert isinstance(new_mgr, BlockManager)
             assert isinstance(self._mgr, BlockManager)
             new_mgr.blocks[0].refs = self._mgr.blocks[0].refs
-            new_mgr.blocks[0].refs.add_reference(new_mgr.blocks[0])
+            new_mgr.blocks[0].refs.add_reference(
+                new_mgr.blocks[0]  # type: ignore[arg-type]
+            )
             return self._constructor(new_mgr).__finalize__(self, method="swapaxes")
 
         elif (copy or copy is None) and self._mgr.is_single_block:
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 66b77b13905d2..88d00e47ce694 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -695,7 +695,6 @@ def reindex_indexer(
                 use_na_proxy=use_na_proxy,
             )
         else:
-            print(self.blocks)
             new_blocks = [
                 blk.take_nd(
                     indexer,
@@ -757,7 +756,7 @@ def _slice_take_blocks_ax0(
                 # GH#32959 EABlock would fail since we can't make 0-width
                 # TODO(EA2D): special casing unnecessary with 2D EAs
                 if sllen == 0:
-                    return [], []
+                    return []
                 bp = BlockPlacement(slice(0, sllen))
                 return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)]
             elif not allow_fill or self.ndim == 1:
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index f37b0604628c8..042ea42407dff 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -550,9 +550,9 @@ def __init__(
                     obj = sample._constructor(obj, columns=[name], copy=False)
                     if using_copy_on_write():
                         # TODO(CoW): Remove when ref tracking in constructors works
-                        for i in range(len(original_obj._mgr.blocks)):
-                            obj._mgr.blocks[i].refs = original_obj._mgr.blocks[i].refs
-                            obj._mgr.blocks[i].refs.add_reference(obj._mgr.blocks[i])
+                        for i in range(len(original_obj._mgr.blocks)):  # type: ignore[union-attr]  # noqa
+                            obj._mgr.blocks[i].refs = original_obj._mgr.blocks[i].refs  # type: ignore[union-attr]  # noqa
+                            obj._mgr.blocks[i].refs.add_reference(obj._mgr.blocks[i])  # type: ignore[arg-type, union-attr]  # noqa
 
                     obj.columns = [new_name]
 
@@ -613,8 +613,8 @@ def get_result(self):
                 )
                 if using_copy_on_write() and not self.copy:
                     for i in range(len(self.objs)):
-                        mgr.blocks[i].refs = self.objs[i]._mgr.blocks[0].refs
-                        mgr.blocks[i].refs.add_reference(mgr.blocks[i])
+                        mgr.blocks[i].refs = self.objs[i]._mgr.blocks[0].refs  # type: ignore[union-attr]  # noqa
+                        mgr.blocks[i].refs.add_reference(mgr.blocks[i])  # type: ignore[arg-type, union-attr]  # noqa
                 df = cons(mgr, copy=False)
                 df.columns = columns
                 return df.__finalize__(self, method="concat")

From 47269ce34906f53b363472902ffd4c6b8902140a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 3 Feb 2023 16:59:20 +0100
Subject: [PATCH 05/22] Add docs

---
 pandas/_libs/internals.pyx | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 3ce0e2bb94eef..021c229fe64c9 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -860,6 +860,11 @@ cdef class BlockManager:
 
 
 cdef class BlockValuesRefs:
+    """Tracks all references to a given array.
+
+    Keeps track of all blocks (through weak references) that reference the same
+    data.
+    """
     cdef:
         public list referenced_blocks
 
@@ -867,9 +872,25 @@ cdef class BlockValuesRefs:
         self.referenced_blocks = [weakref.ref(blk)]
 
     def add_reference(self, blk: SharedBlock) -> None:
+        """Adds a new reference to our reference collection.
+
+        Parameters
+        ----------
+        blk: SharedBlock
+            The block that the new references should point to.
+        """
         self.referenced_blocks.append(weakref.ref(blk))
 
     def has_reference(self) -> bool:
+        """Checks if block has foreign references.
+
+        A reference is only relevant if it is still alive. The reference to
+        ourselves does not count.
+
+        Returns
+        -------
+            bool
+        """
         self.referenced_blocks = [
             obj for obj in self.referenced_blocks if obj() is not None
         ]

From 60340539a7b5823d7720a766352bce8a348db566 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 3 Feb 2023 17:12:26 +0100
Subject: [PATCH 06/22] Add tests

---
 pandas/tests/copy_view/test_methods.py | 31 ++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index dc5711f1b2ddd..cf316f678fe71 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -1216,3 +1216,34 @@ def test_isetitem(using_copy_on_write):
         assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
     else:
         assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
+
+
+def test_assigning_to_same_variable_removes_references(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3]})
+    df = df.reset_index()
+    arr = get_array(df, "a")
+    df.iloc[0, 1] = 100  # Write into a
+
+    assert np.shares_memory(arr, get_array(df, "a"))
+
+
+def test_setitem_dont_track_unnecessary_references(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
+
+    df["b"] = 100
+    arr = get_array(df, "a")
+    df.iloc[0, 0] = 100  # Check that we correctly track reference
+    assert np.shares_memory(arr, get_array(df, "a"))
+
+
+def test_setitem_with_view_copies(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
+    view = df[:]
+    expected = df.copy()
+
+    df["b"] = 100
+    arr = get_array(df, "a")
+    df.iloc[0, 0] = 100  # Check that we correctly track reference
+    if using_copy_on_write:
+        assert not np.shares_memory(arr, get_array(df, "a"))
+        tm.assert_frame_equal(view, expected)

From 4d4f85622dcce8cdd929e346fc15275335978cee Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 3 Feb 2023 19:14:49 +0100
Subject: [PATCH 07/22] Fix pylint

---
 pandas/core/reshape/concat.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 042ea42407dff..43e0b77a90a85 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -550,8 +550,8 @@ def __init__(
                     obj = sample._constructor(obj, columns=[name], copy=False)
                     if using_copy_on_write():
                         # TODO(CoW): Remove when ref tracking in constructors works
-                        for i in range(len(original_obj._mgr.blocks)):  # type: ignore[union-attr]  # noqa
-                            obj._mgr.blocks[i].refs = original_obj._mgr.blocks[i].refs  # type: ignore[union-attr]  # noqa
+                        for i, block in enumerate(original_obj._mgr.blocks):  # type: ignore[union-attr]  # noqa
+                            obj._mgr.blocks[i].refs = block.refs  # type: ignore[union-attr]  # noqa
                             obj._mgr.blocks[i].refs.add_reference(obj._mgr.blocks[i])  # type: ignore[arg-type, union-attr]  # noqa
 
                     obj.columns = [new_name]
@@ -612,8 +612,8 @@ def get_result(self):
                     typ=get_option("mode.data_manager"),
                 )
                 if using_copy_on_write() and not self.copy:
-                    for i in range(len(self.objs)):
-                        mgr.blocks[i].refs = self.objs[i]._mgr.blocks[0].refs  # type: ignore[union-attr]  # noqa
+                    for i, obj in enumerate(self.objs):
+                        mgr.blocks[i].refs = obj._mgr.blocks[0].refs  # type: ignore[union-attr]  # noqa
                         mgr.blocks[i].refs.add_reference(mgr.blocks[i])  # type: ignore[arg-type, union-attr]  # noqa
                 df = cons(mgr, copy=False)
                 df.columns = columns

From 71013221f0181f7f9ca154da59509a55d0c90804 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 3 Feb 2023 19:16:02 +0100
Subject: [PATCH 08/22] Add dev docs for CoW

---
 doc/source/development/copy_on_write.rst | 32 ++++++++++++++++++++++++
 doc/source/development/index.rst         |  1 +
 2 files changed, 33 insertions(+)
 create mode 100644 doc/source/development/copy_on_write.rst

diff --git a/doc/source/development/copy_on_write.rst b/doc/source/development/copy_on_write.rst
new file mode 100644
index 0000000000000..133f27391dfbf
--- /dev/null
+++ b/doc/source/development/copy_on_write.rst
@@ -0,0 +1,32 @@
+.. _copy_on_write::
+
+{{ header }}
+
+*************
+Copy on write
+*************
+
+Copy on Write is a mechanism to simplify the indexing API and improve
+performance through avoiding copies if possible.
+CoW means that any DataFrame or Series derived from another in any way always
+behaves as a copy.
+
+Reference tracking
+------------------
+
+To be able to determine, if we have to make a copy when writing into a DataFrame,
+we have to be aware, if the values are shared with another DataFrame. pandas
+keeps track of all ``Blocks`` that share values with another block internally to
+be able to tell when a copy needs to be triggered. The reference tracking
+mechanism is implemented on the Block level.
+
+We use a custom reference tracker object, e.g. ``BlockValuesRefs`` that keeps
+track of every block, whose values share memory with each other. The reference
+is held through a weak-reference. Every two blocks that share some memory should
+point to the same ``BlockValuesRefs`` object. If one block goes out of
+scope, the reference to this block dies. As a consequence, the reference tracker
+object always knows how many blocks are alive and share memory.
+
+We can ask the reference tracking object if there is another block alive that shares
+data with us before writing into the values. We can trigger a copy before
+writing if there is in fact another block alive.
diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst
index c741441cf67a1..69f04494a271c 100644
--- a/doc/source/development/index.rst
+++ b/doc/source/development/index.rst
@@ -18,6 +18,7 @@ Development
     contributing_codebase
     maintaining
     internals
+    copy_on_write
     debugging_extensions
     extending
     developer

From 189c7eac0553d47667559260937a2bd32cec0072 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 3 Feb 2023 20:41:31 +0100
Subject: [PATCH 09/22] Update copy_on_write.rst

---
 doc/source/development/copy_on_write.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/development/copy_on_write.rst b/doc/source/development/copy_on_write.rst
index 133f27391dfbf..2c7a54c60cbc7 100644
--- a/doc/source/development/copy_on_write.rst
+++ b/doc/source/development/copy_on_write.rst
@@ -1,4 +1,4 @@
-.. _copy_on_write::
+.. _copy_on_write:
 
 {{ header }}
 

From b79686926eac07c9c3b9d8e9c7e24be2b8d9e801 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 3 Feb 2023 22:12:26 +0100
Subject: [PATCH 10/22] Add tests

---
 .../copy_view/test_core_functionalities.py    | 77 +++++++++++++++++++
 pandas/tests/copy_view/test_methods.py        | 31 --------
 2 files changed, 77 insertions(+), 31 deletions(-)
 create mode 100644 pandas/tests/copy_view/test_core_functionalities.py

diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py
new file mode 100644
index 0000000000000..4461ff3758e89
--- /dev/null
+++ b/pandas/tests/copy_view/test_core_functionalities.py
@@ -0,0 +1,77 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+from pandas.tests.copy_view.util import get_array
+
+
+def test_assigning_to_same_variable_removes_references(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3]})
+    df = df.reset_index()
+    arr = get_array(df, "a")
+    df.iloc[0, 1] = 100  # Write into a
+
+    assert np.shares_memory(arr, get_array(df, "a"))
+
+
+def test_setitem_dont_track_unnecessary_references(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
+
+    df["b"] = 100
+    arr = get_array(df, "a")
+    df.iloc[0, 0] = 100  # Check that we correctly removed the reference
+    assert np.shares_memory(arr, get_array(df, "a"))
+
+
+def test_setitem_with_view_copies(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
+    view = df[:]
+    expected = df.copy()
+
+    df["b"] = 100
+    arr = get_array(df, "a")
+    df.iloc[0, 0] = 100  # Check that we correctly track reference
+    if using_copy_on_write:
+        assert not np.shares_memory(arr, get_array(df, "a"))
+        tm.assert_frame_equal(view, expected)
+
+
+def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
+    view = df[:]
+
+    df["b"] = 100
+    arr = get_array(df, "a")
+    view = None  # noqa
+    df.iloc[0, 0] = 100  # Check that we correctly release the reference
+    if using_copy_on_write:
+        pytest.mark.xfail("blk.delete does not track references correctly")
+        assert np.shares_memory(arr, get_array(df, "a"))
+
+
+def test_out_of_scope(using_copy_on_write):
+    def func():
+        df = DataFrame({"a": [1, 2], "b": 1.5, "c": 1})
+        # create some subset
+        result = df[["a", "b"]]
+        return result
+
+    result = func()
+    if using_copy_on_write:
+        assert not result._mgr.blocks[0].refs.has_reference()
+        assert not result._mgr.blocks[1].refs.has_reference()
+
+
+def test_delete(using_copy_on_write):
+    df = DataFrame(np.random.randn(4, 3), columns=["a", "b", "c"])
+    del df["b"]
+    if using_copy_on_write:
+        # TODO: This should not have references, delete makes a shallow copy
+        # but keeps the blocks alive
+        assert df._mgr.blocks[0].refs.has_reference()
+        assert df._mgr.blocks[1].refs.has_reference()
+
+    df = df[["a"]]
+    if using_copy_on_write:
+        assert not df._mgr.blocks[0].refs.has_reference()
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index cf316f678fe71..dc5711f1b2ddd 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -1216,34 +1216,3 @@ def test_isetitem(using_copy_on_write):
         assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
     else:
         assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
-
-
-def test_assigning_to_same_variable_removes_references(using_copy_on_write):
-    df = DataFrame({"a": [1, 2, 3]})
-    df = df.reset_index()
-    arr = get_array(df, "a")
-    df.iloc[0, 1] = 100  # Write into a
-
-    assert np.shares_memory(arr, get_array(df, "a"))
-
-
-def test_setitem_dont_track_unnecessary_references(using_copy_on_write):
-    df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
-
-    df["b"] = 100
-    arr = get_array(df, "a")
-    df.iloc[0, 0] = 100  # Check that we correctly track reference
-    assert np.shares_memory(arr, get_array(df, "a"))
-
-
-def test_setitem_with_view_copies(using_copy_on_write):
-    df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
-    view = df[:]
-    expected = df.copy()
-
-    df["b"] = 100
-    arr = get_array(df, "a")
-    df.iloc[0, 0] = 100  # Check that we correctly track reference
-    if using_copy_on_write:
-        assert not np.shares_memory(arr, get_array(df, "a"))
-        tm.assert_frame_equal(view, expected)

From 4712c0b5a9d129ae92e9164045c63011e53fae7b Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 4 Feb 2023 00:57:45 +0100
Subject: [PATCH 11/22] Fix xfail

---
 pandas/conftest.py                                  | 1 +
 pandas/tests/copy_view/test_core_functionalities.py | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 64a8f0f9efc1d..08c6df08131c0 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1933,6 +1933,7 @@ def using_copy_on_write() -> bool:
     """
     Fixture to check if Copy-on-Write is enabled.
     """
+    pd.options.mode.copy_on_write = True
     return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block"
 
 
diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py
index 4461ff3758e89..f92bf26a31029 100644
--- a/pandas/tests/copy_view/test_core_functionalities.py
+++ b/pandas/tests/copy_view/test_core_functionalities.py
@@ -37,7 +37,7 @@ def test_setitem_with_view_copies(using_copy_on_write):
         tm.assert_frame_equal(view, expected)
 
 
-def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write):
+def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write, request):
     df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
     view = df[:]
 
@@ -46,7 +46,10 @@ def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write):
     view = None  # noqa
     df.iloc[0, 0] = 100  # Check that we correctly release the reference
     if using_copy_on_write:
-        pytest.mark.xfail("blk.delete does not track references correctly")
+        mark = pytest.mark.xfail(
+            reason="blk.delete does not track references correctly"
+        )
+        request.node.add_marker(mark)
         assert np.shares_memory(arr, get_array(df, "a"))
 
 

From f3f425c5911574a2e24c977f7bf21467ef1dd994 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 4 Feb 2023 00:57:50 +0100
Subject: [PATCH 12/22] Fix xfail

---
 pandas/conftest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 08c6df08131c0..64a8f0f9efc1d 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1933,7 +1933,6 @@ def using_copy_on_write() -> bool:
     """
     Fixture to check if Copy-on-Write is enabled.
     """
-    pd.options.mode.copy_on_write = True
     return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block"
 
 

From 1c65bd8401d1944970791a0b34eb9fe3d8368134 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 6 Feb 2023 22:15:12 +0100
Subject: [PATCH 13/22] Update pandas/_libs/internals.pyx

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/_libs/internals.pyx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 021c229fe64c9..d821b9308c500 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -602,6 +602,9 @@ cdef class SharedBlock:
         self._mgr_locs = placement
         self.ndim = ndim
         if refs is None:
+            # if no refs are passed, that means we are creating a Block from
+            # new values that it uniquely owns -> start a new BlockValuesRefs
+            # object that only references this block
             self.refs = BlockValuesRefs(self)
         else:
             refs.add_reference(self)

From 896b6a87485f5f3e4db4d53b00d78df436dde7b7 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 6 Feb 2023 22:15:35 +0100
Subject: [PATCH 14/22] Update pandas/_libs/internals.pyx

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/_libs/internals.pyx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index d821b9308c500..d86ebb3f32465 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -607,6 +607,9 @@ cdef class SharedBlock:
             # object that only references this block
             self.refs = BlockValuesRefs(self)
         else:
+            # if refs are passed, this is the BlockValuesRefs object that is shared
+            # with the parent blocks which share the values, and a reference to this
+            # new block is added
             refs.add_reference(self)
             self.refs = refs
 

From 1122d0cc191dda406499be93b9ea60cc0f708692 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 6 Feb 2023 22:15:44 +0100
Subject: [PATCH 15/22] Update doc/source/development/copy_on_write.rst

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 doc/source/development/copy_on_write.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/development/copy_on_write.rst b/doc/source/development/copy_on_write.rst
index 2c7a54c60cbc7..9d39c1a7ed42b 100644
--- a/doc/source/development/copy_on_write.rst
+++ b/doc/source/development/copy_on_write.rst
@@ -20,7 +20,7 @@ keeps track of all ``Blocks`` that share values with another block internally to
 be able to tell when a copy needs to be triggered. The reference tracking
 mechanism is implemented on the Block level.
 
-We use a custom reference tracker object, e.g. ``BlockValuesRefs`` that keeps
+We use a custom reference tracker object, ``BlockValuesRefs``, that keeps
 track of every block, whose values share memory with each other. The reference
 is held through a weak-reference. Every two blocks that share some memory should
 point to the same ``BlockValuesRefs`` object. If one block goes out of

From cc1ad65d208430fde47d2a6ba6c3793c1faa9c45 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 6 Feb 2023 22:16:18 +0100
Subject: [PATCH 16/22] Update pandas/_libs/internals.pyx

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/_libs/internals.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index d86ebb3f32465..a1bdc748f9c03 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -895,7 +895,7 @@ cdef class BlockValuesRefs:
 
         Returns
         -------
-            bool
+        bool
         """
         self.referenced_blocks = [
             obj for obj in self.referenced_blocks if obj() is not None

From 8c656a85b5b4fd0fc8cf9801a6d5ca338d8ea5ea Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 6 Feb 2023 22:28:36 +0100
Subject: [PATCH 17/22] Add comment

---
 pandas/tests/copy_view/test_core_functionalities.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py
index f92bf26a31029..8d22433e3a193 100644
--- a/pandas/tests/copy_view/test_core_functionalities.py
+++ b/pandas/tests/copy_view/test_core_functionalities.py
@@ -9,6 +9,7 @@
 def test_assigning_to_same_variable_removes_references(using_copy_on_write):
     df = DataFrame({"a": [1, 2, 3]})
     df = df.reset_index()
+    assert df._mgr._has_no_reference(1)
     arr = get_array(df, "a")
     df.iloc[0, 1] = 100  # Write into a
 
@@ -20,7 +21,9 @@ def test_setitem_dont_track_unnecessary_references(using_copy_on_write):
 
     df["b"] = 100
     arr = get_array(df, "a")
-    df.iloc[0, 0] = 100  # Check that we correctly removed the reference
+    # We split the block in setitem, if we are not careful the new blocks will
+    # reference each other triggering a copy
+    df.iloc[0, 0] = 100
     assert np.shares_memory(arr, get_array(df, "a"))
 
 
@@ -44,8 +47,12 @@ def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write, reques
     df["b"] = 100
     arr = get_array(df, "a")
     view = None  # noqa
-    df.iloc[0, 0] = 100  # Check that we correctly release the reference
+    df.iloc[0, 0] = 100
     if using_copy_on_write:
+        # Setitem split the block. Since the old block shared data with view
+        # all the new blocks are referencing view and each other. When view
+        # goes out of scope, they don't share data with any other block,
+        # so we should not trigger a copy
         mark = pytest.mark.xfail(
             reason="blk.delete does not track references correctly"
         )

From f44d9f262e9ad18133ef7046ef8d8881ecbaef6c Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 6 Feb 2023 22:43:25 +0100
Subject: [PATCH 18/22] Adjust docstring

---
 pandas/_libs/internals.pyx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index a1bdc748f9c03..c08e21d02aa6d 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -598,6 +598,8 @@ cdef class SharedBlock:
         placement : BlockPlacement
         ndim : int
             1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
+        refs: BlockValuesRefs, optional
+            Ref tracking object or None if block does not have any refs.
         """
         self._mgr_locs = placement
         self.ndim = ndim

From a6fbcb9e6f8765f54fa9ce8768cf348377b9d449 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 7 Feb 2023 20:28:31 +0100
Subject: [PATCH 19/22] Fix array manager test

---
 pandas/tests/copy_view/test_core_functionalities.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py
index 8d22433e3a193..204e26b35d680 100644
--- a/pandas/tests/copy_view/test_core_functionalities.py
+++ b/pandas/tests/copy_view/test_core_functionalities.py
@@ -9,7 +9,8 @@
 def test_assigning_to_same_variable_removes_references(using_copy_on_write):
     df = DataFrame({"a": [1, 2, 3]})
     df = df.reset_index()
-    assert df._mgr._has_no_reference(1)
+    if using_copy_on_write:
+        assert df._mgr._has_no_reference(1)
     arr = get_array(df, "a")
     df.iloc[0, 1] = 100  # Write into a
 

From fc71adda341a78afc4b3c5a545160a6a17611b3a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 7 Feb 2023 22:36:31 +0100
Subject: [PATCH 20/22] Switch to WeakSet

---
 pandas/_libs/internals.pyx | 9 +++------
 pandas/conftest.py         | 1 +
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index c08e21d02aa6d..b5ff69d92492f 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -874,10 +874,10 @@ cdef class BlockValuesRefs:
     data.
     """
     cdef:
-        public list referenced_blocks
+        public object referenced_blocks
 
     def __cinit__(self, blk: SharedBlock) -> None:
-        self.referenced_blocks = [weakref.ref(blk)]
+        self.referenced_blocks = weakref.WeakSet([blk])
 
     def add_reference(self, blk: SharedBlock) -> None:
         """Adds a new reference to our reference collection.
@@ -887,7 +887,7 @@ cdef class BlockValuesRefs:
         blk: SharedBlock
             The block that the new references should point to.
         """
-        self.referenced_blocks.append(weakref.ref(blk))
+        self.referenced_blocks.add(blk)
 
     def has_reference(self) -> bool:
         """Checks if block has foreign references.
@@ -899,8 +899,5 @@ cdef class BlockValuesRefs:
         -------
         bool
         """
-        self.referenced_blocks = [
-            obj for obj in self.referenced_blocks if obj() is not None
-        ]
         # Checking for more references than block pointing to itself
         return len(self.referenced_blocks) > 1
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 888205366f9e6..6e6b00e91dfbb 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1949,6 +1949,7 @@ def using_copy_on_write() -> bool:
     """
     Fixture to check if Copy-on-Write is enabled.
     """
+    pd.options.mode.copy_on_write = True
     return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block"
 
 

From a45a70a582536f5fe6a2792ea32e83d79427b148 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 7 Feb 2023 22:37:22 +0100
Subject: [PATCH 21/22] Switch to WeakSet

---
 pandas/conftest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 6e6b00e91dfbb..888205366f9e6 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1949,7 +1949,6 @@ def using_copy_on_write() -> bool:
     """
     Fixture to check if Copy-on-Write is enabled.
     """
-    pd.options.mode.copy_on_write = True
     return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block"
 
 

From b05d3aaff1c5047156a7945fa8d55c7875edad77 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 7 Feb 2023 23:05:31 +0100
Subject: [PATCH 22/22] Add doc explanation about weakset

---
 doc/source/development/copy_on_write.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/doc/source/development/copy_on_write.rst b/doc/source/development/copy_on_write.rst
index 9d39c1a7ed42b..34625ed645615 100644
--- a/doc/source/development/copy_on_write.rst
+++ b/doc/source/development/copy_on_write.rst
@@ -27,6 +27,15 @@ point to the same ``BlockValuesRefs`` object. If one block goes out of
 scope, the reference to this block dies. As a consequence, the reference tracker
 object always knows how many blocks are alive and share memory.
 
+Whenever a :class:`DataFrame` or :class:`Series` object is sharing data with another
+object, it is required that each of those objects have its own BlockManager and Block
+objects. Thus, in other words, one Block instance (that is held by a DataFrame, not
+necessarily for intermediate objects) should always be uniquely used for only
+a single DataFrame/Series object. For example, when you want to use the same
+Block for another object, you can create a shallow copy of the Block instance
+with ``block.copy(deep=False)`` (which will create a new Block instance with
+the same underlying values and which will correctly set up the references).
+
 We can ask the reference tracking object if there is another block alive that shares
 data with us before writing into the values. We can trigger a copy before
 writing if there is in fact another block alive.