From ed2e322a23421702ad295d57268159c1bef8b43a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 17 Jan 2023 22:03:11 +0100
Subject: [PATCH 01/23] ENH: Add lazy copy to astype

---
 pandas/core/generic.py                 | 10 ++++--
 pandas/core/internals/blocks.py        |  7 ++++
 pandas/core/internals/managers.py      | 16 +++++++--
 pandas/tests/copy_view/test_methods.py | 46 ++++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 700601cb4f024..57067eae577c2 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6011,7 +6011,7 @@ def dtypes(self):
         return self._constructor_sliced(data, index=self._info_axis, dtype=np.object_)
 
     def astype(
-        self: NDFrameT, dtype, copy: bool_t = True, errors: IgnoreRaise = "raise"
+        self: NDFrameT, dtype, copy: bool_t | None = None, errors: IgnoreRaise = "raise"
     ) -> NDFrameT:
         """
         Cast a pandas object to a specified dtype ``dtype``.
@@ -6159,7 +6159,11 @@ def astype(
             for i, (col_name, col) in enumerate(self.items()):
                 cdt = dtype_ser.iat[i]
                 if isna(cdt):
-                    res_col = col.copy() if copy else col
+                    if using_copy_on_write():
+                        # Make a shallow copy even if copy=False for CoW
+                        res_col = col.copy(deep=copy)
+                    else:
+                        res_col = col if copy is False else col.copy()
                 else:
                     try:
                         res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
@@ -6186,7 +6190,7 @@ def astype(
 
         # GH 33113: handle empty frame or series
         if not results:
-            return self.copy()
+            return self.copy(deep=None)
 
         # GH 19920: retain column metadata after concat
         result = concat(results, axis=1, copy=False)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 4bb4882574228..2af52ddf82f62 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -11,9 +11,12 @@
     cast,
     final,
 )
+import weakref
 
 import numpy as np
 
+from pandas._config import using_copy_on_write
+
 from pandas._libs import (
     Timestamp,
     internals as libinternals,
@@ -152,6 +155,7 @@ class Block(PandasObject):
     is_extension = False
     _can_consolidate = True
     _validate_ndim = True
+    _ref = None
 
     @final
     @cache_readonly
@@ -496,6 +500,9 @@ def astype(
                 f"({self.dtype.name} [{self.shape}]) to different shape "
                 f"({newb.dtype.name} [{newb.shape}])"
             )
+        if using_copy_on_write():
+            if not copy and values is new_values:
+                newb._ref = weakref.ref(self)
         return newb
 
     @final
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 1a0aba0778da5..943028e0c27b9 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -435,8 +435,20 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
             "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
         )
 
-    def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
-        return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
+    def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T:
+        if copy is None:
+            if using_copy_on_write():
+                copy = False
+            else:
+                copy = True
+
+        result = self.apply("astype", dtype=dtype, copy=copy, errors=errors)
+        if using_copy_on_write() and not copy:
+            refs = [blk._ref for blk in result.blocks]
+            if any(ref is not None for ref in refs):
+                result.refs = refs
+                result.parent = self
+        return result
 
     def convert(self: T, copy: bool) -> T:
         return self.apply(
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index 8fd9d5c5126c1..5f4ed77b08d2e 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -417,6 +417,52 @@ def test_to_frame(using_copy_on_write):
         tm.assert_frame_equal(df, expected)
 
 
+def test_astype_single_dtype(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": 1.5})
+    df_orig = df.copy()
+    df2 = df.astype("float64")
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    # mutating df2 triggers a copy-on-write for that column/block
+    df2.iloc[0, 2] = 5.5
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+    tm.assert_frame_equal(df, df_orig)
+
+
+def test_astype_dict_dtypes(using_copy_on_write):
+    df = DataFrame(
+        {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")}
+    )
+    df_orig = df.copy()
+    df2 = df.astype({"a": "float64", "c": "float64"})
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+        assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+        assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    # mutating df2 triggers a copy-on-write for that column/block
+    df2.iloc[0, 2] = 5.5
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+
+    df2.iloc[0, 1] = 10
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+    tm.assert_frame_equal(df, df_orig)
+
+
 @pytest.mark.parametrize("ax", ["index", "columns"])
 def test_swapaxes_noop(using_copy_on_write, ax):
     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})

From 1967a2a47486ab51d950fbb79b9c9a6dedacdefc Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 17 Jan 2023 23:09:01 +0100
Subject: [PATCH 02/23] Fix

---
 pandas/core/internals/blocks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 2af52ddf82f62..aaa745a380d06 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -501,7 +501,8 @@ def astype(
                 f"({newb.dtype.name} [{newb.shape}])"
             )
         if using_copy_on_write():
-            if not copy and values is new_values:
+            if not copy:
+                # This tracks more references than necessary.
                 newb._ref = weakref.ref(self)
         return newb
 

From 45ea9892a56fb1c5ce7f41a0a0cc50944a07be5a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 17 Jan 2023 23:21:50 +0100
Subject: [PATCH 03/23] Fix test

---
 pandas/tests/series/test_constructors.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 05e40e20f1226..8bf35621a1632 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -874,13 +874,16 @@ def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtyp
         with pytest.raises(IntCastingNaNError, match=msg):
             Series(np.array(vals), dtype=any_int_numpy_dtype)
 
-    def test_constructor_dtype_no_cast(self):
+    def test_constructor_dtype_no_cast(self, using_copy_on_write):
         # see gh-1572
         s = Series([1, 2, 3])
         s2 = Series(s, dtype=np.int64)
 
         s2[1] = 5
-        assert s[1] == 5
+        if using_copy_on_write:
+            assert s[1] == 2
+        else:
+            assert s[1] == 5
 
     def test_constructor_datelike_coercion(self):
 

From a2e1e537d37030dfb93534e0936107ab7019823e Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 18 Jan 2023 12:18:15 +0100
Subject: [PATCH 04/23] Fix array manager

---
 pandas/core/internals/array_manager.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index b8ef925362e7b..f30b7c4c28859 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -371,7 +371,10 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
             "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
         )
 
-    def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
+    def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T:
+        if copy is None:
+            copy = True
+
         return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors)
 
     def convert(self: T, copy: bool) -> T:

From 8f29fdaa364a71bb04503609f30372b2cfe910d6 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Thu, 19 Jan 2023 14:24:20 +0100
Subject: [PATCH 05/23] Narrow down

---
 pandas/core/internals/blocks.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index aaa745a380d06..fc43d35001e9e 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -501,7 +501,15 @@ def astype(
                 f"({newb.dtype.name} [{newb.shape}])"
             )
         if using_copy_on_write():
-            if not copy:
+            if (
+                not copy
+                and isinstance(values.dtype, np.dtype)
+                and isinstance(new_values.dtype, np.dtype)
+                and values is not new_values
+            ):
+                # We made a copy
+                pass
+            elif not copy:
                 # This tracks more references than necessary.
                 newb._ref = weakref.ref(self)
         return newb

From a1847e8f0a29011102e28ba7813da6076247adb7 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 25 Jan 2023 20:29:53 -0500
Subject: [PATCH 06/23] Update astype

---
 pandas/core/dtypes/astype.py           | 33 +++++++++++
 pandas/core/internals/blocks.py        | 16 ++----
 pandas/tests/copy_view/test_methods.py | 78 ++++++++++++++++++++++++++
 3 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index e5b0b5658534f..6dde94748efc6 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -26,6 +26,7 @@
     is_dtype_equal,
     is_integer_dtype,
     is_object_dtype,
+    is_string_dtype,
     is_timedelta64_dtype,
     pandas_dtype,
 )
@@ -246,3 +247,35 @@ def astype_array_safe(
             raise
 
     return new_values
+
+
+def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
+    """Checks if astype avoided copying the data.
+
+    Parameters
+    ----------
+    dtype : Original dtype
+    new_dtype : target dtype
+
+    Returns
+    -------
+    True if new data is a view, False otherwise
+    """
+    if dtype == new_dtype:
+        return True
+
+    if isinstance(dtype, np.dtype) and isinstance(new_dtype, np.dtype):
+        # Only equal numpy dtypes avoid a copy
+        return False
+
+    if is_string_dtype(dtype) and is_string_dtype(new_dtype):
+        return True
+    elif is_string_dtype(dtype) or is_string_dtype(new_dtype):
+        return False
+
+    if getattr(dtype, "numpy_dtype", dtype) == getattr(
+        new_dtype, "numpy_dtype", new_dtype
+    ):
+        return True
+
+    return False
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 437631bc232e6..1663a491f41e7 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -41,7 +41,10 @@
 from pandas.util._decorators import cache_readonly
 from pandas.util._validators import validate_bool_kwarg
 
-from pandas.core.dtypes.astype import astype_array_safe
+from pandas.core.dtypes.astype import (
+    astype_array_safe,
+    astype_is_view,
+)
 from pandas.core.dtypes.cast import (
     LossySetitemError,
     can_hold_element,
@@ -501,16 +504,7 @@ def astype(
                 f"({newb.dtype.name} [{newb.shape}])"
             )
         if using_copy_on_write():
-            if (
-                not copy
-                and isinstance(values.dtype, np.dtype)
-                and isinstance(new_values.dtype, np.dtype)
-                and values is not new_values
-            ):
-                # We made a copy
-                pass
-            elif not copy:
-                # This tracks more references than necessary.
+            if astype_is_view(values.dtype, new_values.dtype):
                 newb._ref = weakref.ref(self)
         return newb
 
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index 6a64dde97bfc5..b45907bac16c4 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat import pa_version_under6p0
+
 from pandas import (
     DataFrame,
     Index,
@@ -509,6 +511,82 @@ def test_astype_single_dtype(using_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
+@pytest.mark.parametrize("dtype", ["int64", "Int64"])
+@pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
+def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
+    if dtype == "int32[pyarrow]" and pa_version_under6p0:
+        pytest.skip("pyarrow not installed")
+    df = DataFrame({"a": [1, 2, 3]}, dtype=dtype)
+    df_orig = df.copy()
+    df2 = df.astype(new_dtype)
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        if new_dtype == "int64[pyarrow]":
+            pytest.skip("Does not make a copy")
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    # mutating df2 triggers a copy-on-write for that column/block
+    df2.iloc[0, 0] = 10
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    tm.assert_frame_equal(df, df_orig)
+
+
+@pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"])
+def test_astype_different_target_dtype(using_copy_on_write, dtype):
+    if dtype == "int32[pyarrow]" and pa_version_under6p0:
+        pytest.skip("pyarrow not installed")
+    df = DataFrame({"a": [1, 2, 3]})
+    df_orig = df.copy()
+    df2 = df.astype(dtype)
+
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    df2.iloc[0, 0] = 5
+    tm.assert_frame_equal(df, df_orig)
+
+
+@pytest.mark.parametrize(
+    "dtype, new_dtype", [("object", "string"), ("string", "object")]
+)
+def test_astype_string_and_object(using_copy_on_write, dtype, new_dtype):
+    df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
+    df_orig = df.copy()
+    df2 = df.astype(new_dtype)
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    df2.iloc[0, 0] = "x"
+    tm.assert_frame_equal(df, df_orig)
+
+
+@pytest.mark.parametrize(
+    "dtype, new_dtype", [("object", "string"), ("string", "object")]
+)
+def test_astype_string_and_object_update_original(
+    using_copy_on_write, dtype, new_dtype
+):
+    df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
+    df2 = df.astype(new_dtype)
+    df_orig = df2.copy()
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    df.iloc[0, 0] = "x"
+    tm.assert_frame_equal(df2, df_orig)
+
+
 def test_astype_dict_dtypes(using_copy_on_write):
     df = DataFrame(
         {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")}

From 75bdf1d2c3b2b141297213a72d84b5903d00ac26 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 25 Jan 2023 20:39:22 -0500
Subject: [PATCH 07/23] Fix datetime case

---
 pandas/core/dtypes/astype.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index 6dde94748efc6..88d0bee46c8db 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -264,16 +264,20 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
     if dtype == new_dtype:
         return True
 
-    if isinstance(dtype, np.dtype) and isinstance(new_dtype, np.dtype):
+    elif isinstance(dtype, np.dtype) and isinstance(new_dtype, np.dtype):
         # Only equal numpy dtypes avoid a copy
         return False
 
     if is_string_dtype(dtype) and is_string_dtype(new_dtype):
         return True
+
     elif is_string_dtype(dtype) or is_string_dtype(new_dtype):
         return False
 
-    if getattr(dtype, "numpy_dtype", dtype) == getattr(
+    elif dtype.kind in "mM" and new_dtype.kind in "mM":
+        return True
+
+    elif getattr(dtype, "numpy_dtype", dtype) == getattr(
         new_dtype, "numpy_dtype", new_dtype
     ):
         return True

From 528b0fb24597a280291c72583163638ee098406c Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 25 Jan 2023 21:41:17 -0500
Subject: [PATCH 08/23] Skip

---
 pandas/tests/copy_view/test_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index b45907bac16c4..d8ccc34087b6f 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -514,7 +514,7 @@ def test_astype_single_dtype(using_copy_on_write):
 @pytest.mark.parametrize("dtype", ["int64", "Int64"])
 @pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
 def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
-    if dtype == "int32[pyarrow]" and pa_version_under6p0:
+    if dtype == "int64[pyarrow]" and pa_version_under6p0:
         pytest.skip("pyarrow not installed")
     df = DataFrame({"a": [1, 2, 3]}, dtype=dtype)
     df_orig = df.copy()

From f29a0be285520fbe6b86299660486bf96d08279f Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Thu, 26 Jan 2023 19:21:34 -0500
Subject: [PATCH 09/23] Fix test

---
 pandas/tests/copy_view/test_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index d8ccc34087b6f..ce58364fbca76 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -514,7 +514,7 @@ def test_astype_single_dtype(using_copy_on_write):
 @pytest.mark.parametrize("dtype", ["int64", "Int64"])
 @pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
 def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
-    if dtype == "int64[pyarrow]" and pa_version_under6p0:
+    if new_dtype == "int64[pyarrow]" and pa_version_under6p0:
         pytest.skip("pyarrow not installed")
     df = DataFrame({"a": [1, 2, 3]}, dtype=dtype)
     df_orig = df.copy()

From 35b9fa4941645c9d25fa352d71074586ca79d492 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Thu, 26 Jan 2023 19:36:18 -0500
Subject: [PATCH 10/23] Refactor

---
 pandas/core/internals/managers.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 943028e0c27b9..c8ebb7c46e1f6 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -436,13 +436,19 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
         )
 
     def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T:
+        return self._apply_and_handle_refs(
+            "astype", copy=copy, dtype=dtype, errors=errors
+        )
+
+    def _apply_and_handle_refs(self, func: str, copy: bool | None, **kwargs):
         if copy is None:
             if using_copy_on_write():
                 copy = False
             else:
                 copy = True
 
-        result = self.apply("astype", dtype=dtype, copy=copy, errors=errors)
+        result = self.apply(func, copy=copy, **kwargs)
+
         if using_copy_on_write() and not copy:
             refs = [blk._ref for blk in result.blocks]
             if any(ref is not None for ref in refs):

From 664c31ba660746622bdc88b58a3b154350f7efdf Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 30 Jan 2023 22:13:40 +0100
Subject: [PATCH 11/23] Address review

---
 pandas/core/dtypes/astype.py                |  2 ++
 pandas/core/generic.py                      |  6 +-----
 pandas/tests/copy_view/test_constructors.py |  8 +++++---
 pandas/tests/copy_view/test_methods.py      | 22 +++++++++++++++------
 4 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index 88d0bee46c8db..ebb0c4f189b7a 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -280,6 +280,8 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
     elif getattr(dtype, "numpy_dtype", dtype) == getattr(
         new_dtype, "numpy_dtype", new_dtype
     ):
+        # If underlying numpy dtype is the same, no copy is made, e.g.
+        # int64 -> Int64 or int64[pyarrow]
         return True
 
     return False
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 3e100779f0111..2848bee655716 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6290,11 +6290,7 @@ def astype(
             for i, (col_name, col) in enumerate(self.items()):
                 cdt = dtype_ser.iat[i]
                 if isna(cdt):
-                    if using_copy_on_write():
-                        # Make a shallow copy even if copy=False for CoW
-                        res_col = col.copy(deep=copy)
-                    else:
-                        res_col = col if copy is False else col.copy()
+                    res_col = col.copy(deep=copy)
                 else:
                     try:
                         res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
index c04c733e5ee1d..f8a57b67f29fb 100644
--- a/pandas/tests/copy_view/test_constructors.py
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 from pandas import Series
 
@@ -6,13 +7,14 @@
 # Copy/view behaviour for Series / DataFrame constructors
 
 
-def test_series_from_series(using_copy_on_write):
+@pytest.mark.parametrize("dtype", [None, "int64"])
+def test_series_from_series(dtype, using_copy_on_write):
     # Case: constructing a Series from another Series object follows CoW rules:
     # a new object is returned and thus mutations are not propagated
     ser = Series([1, 2, 3], name="name")
 
     # default is copy=False -> new Series is a shallow copy / view of original
-    result = Series(ser)
+    result = Series(ser, dtype=dtype)
 
     # the shallow copy still shares memory
     assert np.shares_memory(ser.values, result.values)
@@ -34,7 +36,7 @@ def test_series_from_series(using_copy_on_write):
         assert np.shares_memory(ser.values, result.values)
 
     # the same when modifying the parent
-    result = Series(ser)
+    result = Series(ser, dtype=dtype)
 
     if using_copy_on_write:
         # mutating original doesn't mutate new series
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index ce58364fbca76..cb36ded7f80e9 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -510,6 +510,11 @@ def test_astype_single_dtype(using_copy_on_write):
         assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
     tm.assert_frame_equal(df, df_orig)
 
+    # mutating parent also doesn't update result
+    df2 = df.astype("float64")
+    df.iloc[0, 2] = 5.5
+    tm.assert_frame_equal(df2, df_orig.astype("float64"))
+
 
 @pytest.mark.parametrize("dtype", ["int64", "Int64"])
 @pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
@@ -523,8 +528,6 @@ def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
     if using_copy_on_write:
         assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
     else:
-        if new_dtype == "int64[pyarrow]":
-            pytest.skip("Does not make a copy")
         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
 
     # mutating df2 triggers a copy-on-write for that column/block
@@ -533,6 +536,11 @@ def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
     tm.assert_frame_equal(df, df_orig)
 
+    # mutating parent also doesn't update result
+    df2 = df.astype(new_dtype)
+    df.iloc[0, 0] = 100
+    tm.assert_frame_equal(df2, df_orig.astype(new_dtype))
+
 
 @pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"])
 def test_astype_different_target_dtype(using_copy_on_write, dtype):
@@ -542,14 +550,16 @@ def test_astype_different_target_dtype(using_copy_on_write, dtype):
     df_orig = df.copy()
     df2 = df.astype(dtype)
 
-    if using_copy_on_write:
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-    else:
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
 
     df2.iloc[0, 0] = 5
     tm.assert_frame_equal(df, df_orig)
 
+    # mutating parent also doesn't update result
+    df2 = df.astype(dtype)
+    df.iloc[0, 0] = 100
+    tm.assert_frame_equal(df2, df_orig.astype(dtype))
+
 
 @pytest.mark.parametrize(
     "dtype, new_dtype", [("object", "string"), ("string", "object")]

From e1dc971997b767e5846dd76e1f7a3532b57f6342 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 30 Jan 2023 22:57:25 +0100
Subject: [PATCH 12/23] Fix decimal case

---
 pandas/core/dtypes/astype.py      |  4 ++++
 pandas/tests/arrays/test_array.py | 11 +++++++++++
 2 files changed, 15 insertions(+)

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index ebb0c4f189b7a..0bac9b463939b 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -271,6 +271,10 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
     if is_string_dtype(dtype) and is_string_dtype(new_dtype):
         return True
 
+    elif is_object_dtype(dtype) and new_dtype.kind == "O":
+        # When the underlying array has dtype object, we don't have to make a copy
+        return True
+
     elif is_string_dtype(dtype) or is_string_dtype(new_dtype):
         return False
 
diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
index 2288ac408d99e..2d25f5a650343 100644
--- a/pandas/tests/arrays/test_array.py
+++ b/pandas/tests/arrays/test_array.py
@@ -421,3 +421,14 @@ def test_array_to_numpy_na():
     result = arr.to_numpy(na_value=True, dtype=bool)
     expected = np.array([True, True])
     tm.assert_numpy_array_equal(result, expected)
+
+
+def test_array_copy_on_write(using_copy_on_write):
+    df = pd.DataFrame({"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype="object")
+    df2 = df.astype(DecimalDtype())
+    df.iloc[0, 0] = 0
+    if using_copy_on_write:
+        expected = pd.DataFrame(
+            {"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype()
+        )
+        tm.assert_equal(df2.values, expected.values)

From 273b5aa5243cac54f4f29ffcbc68987634be4a46 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 7 Feb 2023 21:32:12 +0100
Subject: [PATCH 13/23] Merge CoW ref tracking

---
 doc/source/development/copy_on_write.rst      |  32 +++
 doc/source/development/index.rst              |   1 +
 pandas/_libs/internals.pyi                    |  14 +-
 pandas/_libs/internals.pyx                    | 113 +++++++--
 pandas/compat/pickle_compat.py                |   2 +-
 pandas/core/generic.py                        |   6 +-
 pandas/core/internals/blocks.py               |  33 ++-
 pandas/core/internals/concat.py               |  18 +-
 pandas/core/internals/managers.py             | 223 ++++--------------
 pandas/core/internals/ops.py                  |   2 +-
 pandas/core/reshape/concat.py                 |  16 +-
 pandas/tests/copy_view/test_constructors.py   |   4 +-
 .../copy_view/test_core_functionalities.py    |  88 +++++++
 pandas/tests/copy_view/test_internals.py      |  34 +--
 pandas/tests/copy_view/test_methods.py        |   6 +-
 15 files changed, 320 insertions(+), 272 deletions(-)
 create mode 100644 doc/source/development/copy_on_write.rst
 create mode 100644 pandas/tests/copy_view/test_core_functionalities.py

diff --git a/doc/source/development/copy_on_write.rst b/doc/source/development/copy_on_write.rst
new file mode 100644
index 0000000000000..9d39c1a7ed42b
--- /dev/null
+++ b/doc/source/development/copy_on_write.rst
@@ -0,0 +1,32 @@
+.. _copy_on_write:
+
+{{ header }}
+
+*************
+Copy on write
+*************
+
+Copy on Write is a mechanism to simplify the indexing API and improve
+performance through avoiding copies if possible.
+CoW means that any DataFrame or Series derived from another in any way always
+behaves as a copy.
+
+Reference tracking
+------------------
+
+To be able to determine, if we have to make a copy when writing into a DataFrame,
+we have to be aware, if the values are shared with another DataFrame. pandas
+keeps track of all ``Blocks`` that share values with another block internally to
+be able to tell when a copy needs to be triggered. The reference tracking
+mechanism is implemented on the Block level.
+
+We use a custom reference tracker object, ``BlockValuesRefs``, that keeps
+track of every block, whose values share memory with each other. The reference
+is held through a weak-reference. Every two blocks that share some memory should
+point to the same ``BlockValuesRefs`` object. If one block goes out of
+scope, the reference to this block dies. As a consequence, the reference tracker
+object always knows how many blocks are alive and share memory.
+
+We can ask the reference tracking object if there is another block alive that shares
+data with us before writing into the values. We can trigger a copy before
+writing if there is in fact another block alive.
diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst
index c741441cf67a1..69f04494a271c 100644
--- a/doc/source/development/index.rst
+++ b/doc/source/development/index.rst
@@ -18,6 +18,7 @@ Development
     contributing_codebase
     maintaining
     internals
+    copy_on_write
     debugging_extensions
     extending
     developer
diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi
index 79bdbea71e4d8..5dfcc3726c84f 100644
--- a/pandas/_libs/internals.pyi
+++ b/pandas/_libs/internals.pyi
@@ -4,6 +4,7 @@ from typing import (
     final,
     overload,
 )
+import weakref
 
 import numpy as np
 
@@ -59,8 +60,13 @@ class SharedBlock:
     _mgr_locs: BlockPlacement
     ndim: int
     values: ArrayLike
+    refs: BlockValuesRefs
     def __init__(
-        self, values: ArrayLike, placement: BlockPlacement, ndim: int
+        self,
+        values: ArrayLike,
+        placement: BlockPlacement,
+        ndim: int,
+        refs: BlockValuesRefs | None = ...,
     ) -> None: ...
 
 class NumpyBlock(SharedBlock):
@@ -87,3 +93,9 @@ class BlockManager:
     ) -> None: ...
     def get_slice(self: T, slobj: slice, axis: int = ...) -> T: ...
     def _rebuild_blknos_and_blklocs(self) -> None: ...
+
+class BlockValuesRefs:
+    referenced_blocks: list[weakref.ref]
+    def __init__(self, blk: SharedBlock) -> None: ...
+    def add_reference(self, blk: SharedBlock) -> None: ...
+    def has_reference(self) -> bool: ...
diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 3333ac1115177..c08e21d02aa6d 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -580,9 +580,16 @@ cdef class SharedBlock:
     """
     cdef:
         public BlockPlacement _mgr_locs
+        public BlockValuesRefs refs
         readonly int ndim
 
-    def __cinit__(self, values, placement: BlockPlacement, ndim: int):
+    def __cinit__(
+        self,
+        values,
+        placement: BlockPlacement,
+        ndim: int,
+        refs: BlockValuesRefs | None = None,
+    ):
         """
         Parameters
         ----------
@@ -591,9 +598,22 @@ cdef class SharedBlock:
         placement : BlockPlacement
         ndim : int
             1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
+        refs: BlockValuesRefs, optional
+            Ref tracking object or None if block does not have any refs.
         """
         self._mgr_locs = placement
         self.ndim = ndim
+        if refs is None:
+            # if no refs are passed, that means we are creating a Block from
+            # new values that it uniquely owns -> start a new BlockValuesRefs
+            # object that only references this block
+            self.refs = BlockValuesRefs(self)
+        else:
+            # if refs are passed, this is the BlockValuesRefs object that is shared
+            # with the parent blocks which share the values, and a reference to this
+            # new block is added
+            refs.add_reference(self)
+            self.refs = refs
 
     cpdef __reduce__(self):
         args = (self.values, self.mgr_locs.indexer, self.ndim)
@@ -619,9 +639,15 @@ cdef class NumpyBlock(SharedBlock):
     cdef:
         public ndarray values
 
-    def __cinit__(self, ndarray values, BlockPlacement placement, int ndim):
+    def __cinit__(
+        self,
+        ndarray values,
+        BlockPlacement placement,
+        int ndim,
+        refs: BlockValuesRefs | None = None,
+    ):
         # set values here; the (implicit) call to SharedBlock.__cinit__ will
-        #  set placement and ndim
+        #  set placement, ndim and refs
         self.values = values
 
     cpdef NumpyBlock getitem_block_index(self, slice slicer):
@@ -631,7 +657,7 @@ cdef class NumpyBlock(SharedBlock):
         Assumes self.ndim == 2
         """
         new_values = self.values[..., slicer]
-        return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
+        return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)
 
 
 cdef class NDArrayBackedBlock(SharedBlock):
@@ -641,9 +667,15 @@ cdef class NDArrayBackedBlock(SharedBlock):
     cdef public:
         NDArrayBacked values
 
-    def __cinit__(self, NDArrayBacked values, BlockPlacement placement, int ndim):
+    def __cinit__(
+        self,
+        NDArrayBacked values,
+        BlockPlacement placement,
+        int ndim,
+        refs: BlockValuesRefs | None = None,
+    ):
         # set values here; the (implicit) call to SharedBlock.__cinit__ will
-        #  set placement and ndim
+        #  set placement, ndim and refs
         self.values = values
 
     cpdef NDArrayBackedBlock getitem_block_index(self, slice slicer):
@@ -653,16 +685,22 @@ cdef class NDArrayBackedBlock(SharedBlock):
         Assumes self.ndim == 2
         """
         new_values = self.values[..., slicer]
-        return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
+        return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)
 
 
 cdef class Block(SharedBlock):
     cdef:
         public object values
 
-    def __cinit__(self, object values, BlockPlacement placement, int ndim):
+    def __cinit__(
+        self,
+        object values,
+        BlockPlacement placement,
+        int ndim,
+        refs: BlockValuesRefs | None = None,
+    ):
         # set values here; the (implicit) call to SharedBlock.__cinit__ will
-        #  set placement and ndim
+        #  set placement, ndim and refs
         self.values = values
 
 
@@ -673,15 +711,11 @@ cdef class BlockManager:
         public list axes
         public bint _known_consolidated, _is_consolidated
         public ndarray _blknos, _blklocs
-        public list refs
-        public object parent
 
     def __cinit__(
         self,
         blocks=None,
         axes=None,
-        refs=None,
-        parent=None,
         verify_integrity=True,
     ):
         # None as defaults for unpickling GH#42345
@@ -695,8 +729,6 @@ cdef class BlockManager:
 
         self.blocks = blocks
         self.axes = axes.copy()  # copy to make sure we are not remotely-mutable
-        self.refs = refs
-        self.parent = parent
 
         # Populate known_consolidate, blknos, and blklocs lazily
         self._known_consolidated = False
@@ -805,16 +837,12 @@ cdef class BlockManager:
             ndarray blknos, blklocs
 
         nbs = []
-        nrefs = []
         for blk in self.blocks:
             nb = blk.getitem_block_index(slobj)
             nbs.append(nb)
-            nrefs.append(weakref.ref(blk))
 
         new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)]
-        mgr = type(self)(
-            tuple(nbs), new_axes, nrefs, parent=self, verify_integrity=False
-        )
+        mgr = type(self)(tuple(nbs), new_axes, verify_integrity=False)
 
         # We can avoid having to rebuild blklocs/blknos
         blklocs = self._blklocs
@@ -827,7 +855,7 @@ cdef class BlockManager:
     def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
 
         if axis == 0:
-            new_blocks, new_refs = self._slice_take_blocks_ax0(slobj)
+            new_blocks = self._slice_take_blocks_ax0(slobj)
         elif axis == 1:
             return self._get_index_slice(slobj)
         else:
@@ -836,6 +864,43 @@ cdef class BlockManager:
         new_axes = list(self.axes)
         new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
 
-        return type(self)(
-            tuple(new_blocks), new_axes, new_refs, parent=self, verify_integrity=False
-        )
+        return type(self)(tuple(new_blocks), new_axes, verify_integrity=False)
+
+
+cdef class BlockValuesRefs:
+    """Tracks all references to a given array.
+
+    Keeps track of all blocks (through weak references) that reference the same
+    data.
+    """
+    cdef:
+        public list referenced_blocks
+
+    def __cinit__(self, blk: SharedBlock) -> None:
+        self.referenced_blocks = [weakref.ref(blk)]
+
+    def add_reference(self, blk: SharedBlock) -> None:
+        """Adds a new reference to our reference collection.
+
+        Parameters
+        ----------
+        blk: SharedBlock
+            The block that the new references should point to.
+        """
+        self.referenced_blocks.append(weakref.ref(blk))
+
+    def has_reference(self) -> bool:
+        """Checks if block has foreign references.
+
+        A reference is only relevant if it is still alive. The reference to
+        ourselves does not count.
+
+        Returns
+        -------
+        bool
+        """
+        self.referenced_blocks = [
+            obj for obj in self.referenced_blocks if obj() is not None
+        ]
+        # Checking for more references than block pointing to itself
+        return len(self.referenced_blocks) > 1
diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py
index 2fbd3a6cdb046..bfee616d52aac 100644
--- a/pandas/compat/pickle_compat.py
+++ b/pandas/compat/pickle_compat.py
@@ -168,7 +168,7 @@ def load_newobj(self) -> None:
         arr = np.array([], dtype="m8[ns]")
         obj = cls.__new__(cls, arr, arr.dtype)
     elif cls is BlockManager and not args:
-        obj = cls.__new__(cls, (), [], None, False)
+        obj = cls.__new__(cls, (), [], False)
     else:
         obj = cls.__new__(cls, *args)
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index caecfac9d25fe..72f9a2f10015d 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -768,8 +768,10 @@ def swapaxes(
             )
             assert isinstance(new_mgr, BlockManager)
             assert isinstance(self._mgr, BlockManager)
-            new_mgr.parent = self._mgr
-            new_mgr.refs = [weakref.ref(self._mgr.blocks[0])]
+            new_mgr.blocks[0].refs = self._mgr.blocks[0].refs
+            new_mgr.blocks[0].refs.add_reference(
+                new_mgr.blocks[0]  # type: ignore[arg-type]
+            )
             return self._constructor(new_mgr).__finalize__(self, method="swapaxes")
 
         elif (copy or copy is None) and self._mgr.is_single_block:
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 1455b3ce86a0d..41b8e63cc7a64 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -23,7 +23,10 @@
     lib,
     writers,
 )
-from pandas._libs.internals import BlockPlacement
+from pandas._libs.internals import (
+    BlockPlacement,
+    BlockValuesRefs,
+)
 from pandas._libs.missing import NA
 from pandas._libs.tslibs import IncompatibleFrequency
 from pandas._typing import (
@@ -150,6 +153,7 @@ class Block(PandasObject):
 
     values: np.ndarray | ExtensionArray
     ndim: int
+    refs: BlockValuesRefs
     __init__: Callable
 
     __slots__ = ()
@@ -272,7 +276,8 @@ def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:
         if new_values.ndim != self.values.ndim:
             raise ValueError("Only same dim slicing is allowed")
 
-        return type(self)(new_values, new_mgr_locs, self.ndim)
+        refs = self.refs if isinstance(slicer, slice) else None
+        return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
 
     @final
     def getitem_block_columns(
@@ -288,7 +293,7 @@ def getitem_block_columns(
         if new_values.ndim != self.values.ndim:
             raise ValueError("Only same dim slicing is allowed")
 
-        return type(self)(new_values, new_mgr_locs, self.ndim)
+        return type(self)(new_values, new_mgr_locs, self.ndim, refs=self.refs)
 
     @final
     def _can_hold_element(self, element: Any) -> bool:
@@ -516,9 +521,13 @@ def to_native_types(self, na_rep: str = "nan", quoting=None, **kwargs) -> Block:
     def copy(self, deep: bool = True) -> Block:
         """copy constructor"""
         values = self.values
+        refs: BlockValuesRefs | None
         if deep:
             values = values.copy()
-        return type(self)(values, placement=self._mgr_locs, ndim=self.ndim)
+            refs = None
+        else:
+            refs = self.refs
+        return type(self)(values, placement=self._mgr_locs, ndim=self.ndim, refs=refs)
 
     # ---------------------------------------------------------------------
     # Replace
@@ -1351,6 +1360,10 @@ def delete(self, loc) -> list[Block]:
         new_blocks: list[Block] = []
 
         previous_loc = -1
+        # TODO(CoW): This is tricky, if parent block goes out of scope
+        # all split blocks are referencing each other even though they
+        # don't share data
+        refs = self.refs if self.refs.has_reference() else None
         for idx in loc:
 
             if idx == previous_loc + 1:
@@ -1361,7 +1374,9 @@ def delete(self, loc) -> list[Block]:
                 # argument type "Tuple[slice, slice]"
                 values = self.values[previous_loc + 1 : idx, :]  # type: ignore[call-overload]  # noqa
                 locs = mgr_locs_arr[previous_loc + 1 : idx]
-                nb = type(self)(values, placement=BlockPlacement(locs), ndim=self.ndim)
+                nb = type(self)(
+                    values, placement=BlockPlacement(locs), ndim=self.ndim, refs=refs
+                )
                 new_blocks.append(nb)
 
             previous_loc = idx
@@ -1818,7 +1833,7 @@ def getitem_block_index(self, slicer: slice) -> ExtensionBlock:
         # GH#42787 in principle this is equivalent to values[..., slicer], but we don't
         # require subclasses of ExtensionArray to support that form (for now).
         new_values = self.values[slicer]
-        return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
+        return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)
 
     def diff(self, n: int, axis: AxisInt = 1) -> list[Block]:
         # only reached with ndim == 2 and axis == 1
@@ -2151,7 +2166,9 @@ def new_block_2d(values: ArrayLike, placement: BlockPlacement):
     return klass(values, ndim=2, placement=placement)
 
 
-def new_block(values, placement, *, ndim: int) -> Block:
+def new_block(
+    values, placement, *, ndim: int, refs: BlockValuesRefs | None = None
+) -> Block:
     # caller is responsible for ensuring values is NOT a PandasArray
 
     if not isinstance(placement, BlockPlacement):
@@ -2162,7 +2179,7 @@ def new_block(values, placement, *, ndim: int) -> Block:
     klass = get_block_type(values.dtype)
 
     values = maybe_coerce_values(values)
-    return klass(values, ndim=ndim, placement=placement)
+    return klass(values, ndim=ndim, placement=placement, refs=refs)
 
 
 def check_ndim(values, placement: BlockPlacement, ndim: int) -> None:
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index d46b51a2ee954..bedd4d92a1ea3 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -7,7 +7,6 @@
     Sequence,
     cast,
 )
-import weakref
 
 import numpy as np
 
@@ -62,10 +61,7 @@
     ensure_block_shape,
     new_block_2d,
 )
-from pandas.core.internals.managers import (
-    BlockManager,
-    using_copy_on_write,
-)
+from pandas.core.internals.managers import BlockManager
 
 if TYPE_CHECKING:
     from pandas import Index
@@ -271,8 +267,6 @@ def _concat_managers_axis0(
 
     offset = 0
     blocks = []
-    refs: list[weakref.ref | None] = []
-    parents: list = []
     for i, mgr in enumerate(mgrs):
         # If we already reindexed, then we definitely don't need another copy
         made_copy = had_reindexers[i]
@@ -289,17 +283,9 @@ def _concat_managers_axis0(
             nb._mgr_locs = nb._mgr_locs.add(offset)
             blocks.append(nb)
 
-        if not made_copy and not copy and using_copy_on_write():
-            refs.extend([weakref.ref(blk) for blk in mgr.blocks])
-            parents.append(mgr)
-        elif using_copy_on_write():
-            refs.extend([None] * len(mgr.blocks))
-
         offset += len(mgr.items)
 
-    result_parents = parents if parents else None
-    result_ref = refs if refs else None
-    result = BlockManager(tuple(blocks), axes, parent=result_parents, refs=result_ref)
+    result = BlockManager(tuple(blocks), axes)
     return result
 
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index dd1394ede32be..d3e406c25e455 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -11,7 +11,6 @@
     cast,
 )
 import warnings
-import weakref
 
 import numpy as np
 
@@ -146,8 +145,6 @@ class BaseBlockManager(DataManager):
     _blklocs: npt.NDArray[np.intp]
     blocks: tuple[Block, ...]
     axes: list[Index]
-    refs: list[weakref.ref | None] | None
-    parent: object
 
     @property
     def ndim(self) -> int:
@@ -156,17 +153,11 @@ def ndim(self) -> int:
     _known_consolidated: bool
     _is_consolidated: bool
 
-    def __init__(self, blocks, axes, refs=None, verify_integrity: bool = True) -> None:
+    def __init__(self, blocks, axes, verify_integrity: bool = True) -> None:
         raise NotImplementedError
 
     @classmethod
-    def from_blocks(
-        cls: type_t[T],
-        blocks: list[Block],
-        axes: list[Index],
-        refs: list[weakref.ref | None] | None = None,
-        parent: object = None,
-    ) -> T:
+    def from_blocks(cls: type_t[T], blocks: list[Block], axes: list[Index]) -> T:
         raise NotImplementedError
 
     @property
@@ -254,19 +245,7 @@ def _has_no_reference_block(self, blkno: int) -> bool:
         (whether it references another array or is itself being referenced)
         Returns True if the block has no references.
         """
-        # TODO(CoW) include `or self.refs[blkno]() is None` ?
-        return (
-            self.refs is None or self.refs[blkno] is None
-        ) and weakref.getweakrefcount(self.blocks[blkno]) == 0
-
-    def _clear_reference_block(self, blkno: int) -> None:
-        """
-        Clear any reference for column `i`.
-        """
-        if self.refs is not None:
-            self.refs[blkno] = None
-            if com.all_none(*self.refs):
-                self.parent = None
+        return not self.blocks[blkno].refs.has_reference()
 
     def get_dtypes(self):
         dtypes = np.array([blk.dtype for blk in self.blocks])
@@ -598,23 +577,17 @@ def _combine(
         # TODO(CoW) we could optimize here if we know that the passed blocks
         # are fully "owned" (eg created from an operation, not coming from
         # an existing manager)
-        new_refs: list[weakref.ref | None] | None = None if copy else []
         for b in blocks:
             nb = b.copy(deep=copy)
             nb.mgr_locs = BlockPlacement(inv_indexer[nb.mgr_locs.indexer])
             new_blocks.append(nb)
-            if not copy:
-                # None has no attribute "append"
-                new_refs.append(weakref.ref(b))  # type: ignore[union-attr]
 
         axes = list(self.axes)
         if index is not None:
             axes[-1] = index
         axes[0] = self.items.take(indexer)
 
-        return type(self).from_blocks(
-            new_blocks, axes, new_refs, parent=None if copy else self
-        )
+        return type(self).from_blocks(new_blocks, axes)
 
     @property
     def nblocks(self) -> int:
@@ -654,17 +627,7 @@ def copy_func(ax):
             new_axes = list(self.axes)
 
         res = self.apply("copy", deep=deep)
-        new_refs: list[weakref.ref | None] | None
-        if deep:
-            new_refs = None
-            parent = None
-        else:
-            new_refs = [weakref.ref(blk) for blk in self.blocks]
-            parent = self
-
         res.axes = new_axes
-        res.refs = new_refs
-        res.parent = parent
 
         if self.ndim > 1:
             # Avoid needing to re-compute these
@@ -688,7 +651,7 @@ def consolidate(self: T) -> T:
         if self.is_consolidated():
             return self
 
-        bm = type(self)(self.blocks, self.axes, self.refs, verify_integrity=False)
+        bm = type(self)(self.blocks, self.axes, verify_integrity=False)
         bm._is_consolidated = False
         bm._consolidate_inplace()
         return bm
@@ -750,13 +713,12 @@ def reindex_indexer(
             raise IndexError("Requested axis not found in manager")
 
         if axis == 0:
-            new_blocks, new_refs = self._slice_take_blocks_ax0(
+            new_blocks = self._slice_take_blocks_ax0(
                 indexer,
                 fill_value=fill_value,
                 only_slice=only_slice,
                 use_na_proxy=use_na_proxy,
             )
-            parent = None if com.all_none(*new_refs) else self
         else:
             new_blocks = [
                 blk.take_nd(
@@ -768,13 +730,11 @@ def reindex_indexer(
                 )
                 for blk in self.blocks
             ]
-            new_refs = None
-            parent = None
 
         new_axes = list(self.axes)
         new_axes[axis] = new_axis
 
-        new_mgr = type(self).from_blocks(new_blocks, new_axes, new_refs, parent=parent)
+        new_mgr = type(self).from_blocks(new_blocks, new_axes)
         if axis == 1:
             # We can avoid the need to rebuild these
             new_mgr._blknos = self.blknos.copy()
@@ -788,7 +748,7 @@ def _slice_take_blocks_ax0(
         only_slice: bool = False,
         *,
         use_na_proxy: bool = False,
-    ) -> tuple[list[Block], list[weakref.ref | None]]:
+    ) -> list[Block]:
         """
         Slice/take blocks along axis=0.
 
@@ -821,11 +781,9 @@ def _slice_take_blocks_ax0(
                 # GH#32959 EABlock would fail since we can't make 0-width
                 # TODO(EA2D): special casing unnecessary with 2D EAs
                 if sllen == 0:
-                    return [], []
+                    return []
                 bp = BlockPlacement(slice(0, sllen))
-                return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)], [
-                    weakref.ref(blk)
-                ]
+                return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)]
             elif not allow_fill or self.ndim == 1:
                 if allow_fill and fill_value is None:
                     fill_value = blk.fill_value
@@ -839,9 +797,7 @@ def _slice_take_blocks_ax0(
                         )
                         for i, ml in enumerate(slobj)
                     ]
-                    # We have
-                    #  all(np.shares_memory(nb.values, blk.values) for nb in blocks)
-                    return blocks, [weakref.ref(blk)] * len(blocks)
+                    return blocks
                 else:
                     bp = BlockPlacement(slice(0, sllen))
                     return [
@@ -851,7 +807,7 @@ def _slice_take_blocks_ax0(
                             new_mgr_locs=bp,
                             fill_value=fill_value,
                         )
-                    ], [None]
+                    ]
 
         if sl_type == "slice":
             blknos = self.blknos[slobj]
@@ -867,7 +823,6 @@ def _slice_take_blocks_ax0(
         # When filling blknos, make sure blknos is updated before appending to
         # blocks list, that way new blkno is exactly len(blocks).
         blocks = []
-        refs: list[weakref.ref | None] = []
         group = not only_slice
         for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=group):
             if blkno == -1:
@@ -880,7 +835,6 @@ def _slice_take_blocks_ax0(
                         use_na_proxy=use_na_proxy,
                     )
                 )
-                refs.append(None)
             else:
                 blk = self.blocks[blkno]
 
@@ -894,7 +848,6 @@ def _slice_take_blocks_ax0(
                         newblk = blk.copy(deep=False)
                         newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1))
                         blocks.append(newblk)
-                        refs.append(weakref.ref(blk))
 
                 else:
                     # GH#32779 to avoid the performance penalty of copying,
@@ -907,7 +860,6 @@ def _slice_take_blocks_ax0(
                     if isinstance(taker, slice):
                         nb = blk.getitem_block_columns(taker, new_mgr_locs=mgr_locs)
                         blocks.append(nb)
-                        refs.append(weakref.ref(blk))
                     elif only_slice:
                         # GH#33597 slice instead of take, so we get
                         #  views instead of copies
@@ -917,13 +869,11 @@ def _slice_take_blocks_ax0(
                             nb = blk.getitem_block_columns(slc, new_mgr_locs=bp)
                             # We have np.shares_memory(nb.values, blk.values)
                             blocks.append(nb)
-                            refs.append(weakref.ref(blk))
                     else:
                         nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs)
                         blocks.append(nb)
-                        refs.append(None)
 
-        return blocks, refs
+        return blocks
 
     def _make_na_block(
         self, placement: BlockPlacement, fill_value=None, use_na_proxy: bool = False
@@ -1008,8 +958,6 @@ def __init__(
         self,
         blocks: Sequence[Block],
         axes: Sequence[Index],
-        refs: list[weakref.ref | None] | None = None,
-        parent: object = None,
         verify_integrity: bool = True,
     ) -> None:
 
@@ -1041,28 +989,13 @@ def _verify_integrity(self) -> None:
                 f"block items\n# manager items: {len(self.items)}, # "
                 f"tot_items: {tot_items}"
             )
-        if self.refs is not None:
-            if len(self.refs) != len(self.blocks):
-                raise AssertionError(
-                    "Number of passed refs must equal the number of blocks: "
-                    f"{len(self.refs)} refs vs {len(self.blocks)} blocks."
-                    "\nIf you see this error, please report a bug at "
-                    "https://github.com/pandas-dev/pandas/issues"
-                )
 
     @classmethod
-    def from_blocks(
-        cls,
-        blocks: list[Block],
-        axes: list[Index],
-        refs: list[weakref.ref | None] | None = None,
-        parent: object = None,
-    ) -> BlockManager:
+    def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> BlockManager:
         """
         Constructor for BlockManager and SingleBlockManager with same signature.
         """
-        parent = parent if using_copy_on_write() else None
-        return cls(blocks, axes, refs, parent, verify_integrity=False)
+        return cls(blocks, axes, verify_integrity=False)
 
     # ----------------------------------------------------------------
     # Indexing
@@ -1081,10 +1014,14 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
         """
         if len(self.blocks) == 1:
             result = self.blocks[0].iget((slice(None), loc))
-            block = new_block(result, placement=slice(0, len(result)), ndim=1)
             # in the case of a single block, the new block is a view
-            ref = weakref.ref(self.blocks[0])
-            return SingleBlockManager(block, self.axes[0], [ref], parent=self)
+            block = new_block(
+                result,
+                placement=slice(0, len(result)),
+                ndim=1,
+                refs=self.blocks[0].refs,
+            )
+            return SingleBlockManager(block, self.axes[0])
 
         dtype = interleaved_dtype([blk.dtype for blk in self.blocks])
 
@@ -1127,10 +1064,10 @@ def iget(self, i: int, track_ref: bool = True) -> SingleBlockManager:
 
         # shortcut for select a single-dim from a 2-dim BM
         bp = BlockPlacement(slice(0, len(values)))
-        nb = type(block)(values, placement=bp, ndim=1)
-        ref = weakref.ref(block) if track_ref else None
-        parent = self if track_ref else None
-        return SingleBlockManager(nb, self.axes[1], [ref], parent)
+        nb = type(block)(
+            values, placement=bp, ndim=1, refs=block.refs if track_ref else None
+        )
+        return SingleBlockManager(nb, self.axes[1])
 
     def iget_values(self, i: int) -> ArrayLike:
         """
@@ -1262,7 +1199,7 @@ def value_getitem(placement):
                     self._iset_split_block(blkno_l, blk_locs)
 
         if len(removed_blknos):
-            # Remove blocks & update blknos and refs accordingly
+            # Remove blocks & update blknos accordingly
             is_deleted = np.zeros(self.nblocks, dtype=np.bool_)
             is_deleted[removed_blknos] = True
 
@@ -1273,18 +1210,14 @@ def value_getitem(placement):
             self.blocks = tuple(
                 blk for i, blk in enumerate(self.blocks) if i not in set(removed_blknos)
             )
-            if self.refs is not None:
-                self.refs = [
-                    ref
-                    for i, ref in enumerate(self.refs)
-                    if i not in set(removed_blknos)
-                ]
 
         if unfit_val_locs:
             unfit_idxr = np.concatenate(unfit_mgr_locs)
             unfit_count = len(unfit_idxr)
 
             new_blocks: list[Block] = []
+            # TODO(CoW) is this always correct to assume that the new_blocks
+            # are not referencing anything else?
             if value_is_extension_type:
                 # This code (ab-)uses the fact that EA blocks contain only
                 # one item.
@@ -1315,10 +1248,6 @@ def value_getitem(placement):
                 self._blklocs[unfit_idxr] = np.arange(unfit_count)
 
             self.blocks += tuple(new_blocks)
-            # TODO(CoW) is this always correct to assume that the new_blocks
-            # are not referencing anything else?
-            if self.refs is not None:
-                self.refs = list(self.refs) + [None] * len(new_blocks)
 
             # Newly created block's dtype may already be present.
             self._known_consolidated = False
@@ -1355,13 +1284,6 @@ def _iset_split_block(
             first_nb = nbs_tup[0]
             nbs_tup = tuple(nbs_tup[1:])
 
-        if self.refs is not None:
-            self.refs.extend([self.refs[blkno_l]] * len(nbs_tup))
-
-        if value is not None:
-            # Only clear if we set new values
-            self._clear_reference_block(blkno_l)
-
         nr_blocks = len(self.blocks)
         blocks_tup = (
             self.blocks[:blkno_l] + (first_nb,) + self.blocks[blkno_l + 1 :] + nbs_tup
@@ -1391,7 +1313,6 @@ def _iset_single(
             if using_copy_on_write() and not self._has_no_reference_block(blkno):
                 # perform Copy-on-Write and clear the reference
                 copy = True
-                self._clear_reference_block(blkno)
             iloc = self.blklocs[loc]
             blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy)
             return
@@ -1400,7 +1321,6 @@ def _iset_single(
         old_blocks = self.blocks
         new_blocks = old_blocks[:blkno] + (nb,) + old_blocks[blkno + 1 :]
         self.blocks = new_blocks
-        self._clear_reference_block(blkno)
         return
 
     def column_setitem(
@@ -1418,7 +1338,6 @@ def column_setitem(
             blocks = list(self.blocks)
             blocks[blkno] = blocks[blkno].copy()
             self.blocks = tuple(blocks)
-            self._clear_reference_block(blkno)
 
         # this manager is only created temporarily to mutate the values in place
         # so don't track references, otherwise the `setitem` would perform CoW again
@@ -1452,6 +1371,7 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
             value = ensure_block_shape(value, ndim=self.ndim)
 
         bp = BlockPlacement(slice(loc, loc + 1))
+        # TODO(CoW) do we always "own" the passed `value`?
         block = new_block_2d(values=value, placement=bp)
 
         if not len(self.blocks):
@@ -1464,9 +1384,6 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
 
         self.axes[0] = new_axis
         self.blocks += (block,)
-        # TODO(CoW) do we always "own" the passed `value`?
-        if self.refs is not None:
-            self.refs += [None]
 
         self._known_consolidated = False
 
@@ -1520,12 +1437,10 @@ def idelete(self, indexer) -> BlockManager:
         is_deleted[indexer] = True
         taker = (~is_deleted).nonzero()[0]
 
-        nbs, new_refs = self._slice_take_blocks_ax0(taker, only_slice=True)
+        nbs = self._slice_take_blocks_ax0(taker, only_slice=True)
         new_columns = self.items[~is_deleted]
         axes = [new_columns, self.axes[1]]
-        # TODO this might not be needed (can a delete ever be done in chained manner?)
-        parent = None if com.all_none(*new_refs) else self
-        return type(self)(tuple(nbs), axes, new_refs, parent, verify_integrity=False)
+        return type(self)(tuple(nbs), axes, verify_integrity=False)
 
     # ----------------------------------------------------------------
     # Block-wise Operation
@@ -1872,10 +1787,7 @@ def _consolidate_inplace(self) -> None:
         #  the DataFrame's _item_cache. The exception is for newly-created
         #  BlockManager objects not yet attached to a DataFrame.
         if not self.is_consolidated():
-            if self.refs is None:
-                self.blocks = _consolidate(self.blocks)
-            else:
-                self.blocks, self.refs = _consolidate_with_refs(self.blocks, self.refs)
+            self.blocks = _consolidate(self.blocks)
             self._is_consolidated = True
             self._known_consolidated = True
             self._rebuild_blknos_and_blklocs()
@@ -1897,8 +1809,6 @@ def __init__(
         self,
         block: Block,
         axis: Index,
-        refs: list[weakref.ref | None] | None = None,
-        parent: object = None,
         verify_integrity: bool = False,
     ) -> None:
         # Assertions disabled for performance
@@ -1907,25 +1817,19 @@ def __init__(
 
         self.axes = [axis]
         self.blocks = (block,)
-        self.refs = refs
-        self.parent = parent if using_copy_on_write() else None
 
     @classmethod
     def from_blocks(
         cls,
         blocks: list[Block],
         axes: list[Index],
-        refs: list[weakref.ref | None] | None = None,
-        parent: object = None,
     ) -> SingleBlockManager:
         """
         Constructor for BlockManager and SingleBlockManager with same signature.
         """
         assert len(blocks) == 1
         assert len(axes) == 1
-        if refs is not None:
-            assert len(refs) == 1
-        return cls(blocks[0], axes[0], refs, parent, verify_integrity=False)
+        return cls(blocks[0], axes[0], verify_integrity=False)
 
     @classmethod
     def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager:
@@ -1942,13 +1846,9 @@ def to_2d_mgr(self, columns: Index) -> BlockManager:
         blk = self.blocks[0]
         arr = ensure_block_shape(blk.values, ndim=2)
         bp = BlockPlacement(0)
-        new_blk = type(blk)(arr, placement=bp, ndim=2)
+        new_blk = type(blk)(arr, placement=bp, ndim=2, refs=blk.refs)
         axes = [columns, self.axes[0]]
-        refs: list[weakref.ref | None] = [weakref.ref(blk)]
-        parent = self if using_copy_on_write() else None
-        return BlockManager(
-            [new_blk], axes=axes, refs=refs, parent=parent, verify_integrity=False
-        )
+        return BlockManager([new_blk], axes=axes, verify_integrity=False)
 
     def _has_no_reference(self, i: int = 0) -> bool:
         """
@@ -1956,9 +1856,7 @@ def _has_no_reference(self, i: int = 0) -> bool:
         (whether it references another array or is itself being referenced)
         Returns True if the column has no references.
         """
-        return (self.refs is None or self.refs[0] is None) and weakref.getweakrefcount(
-            self.blocks[0]
-        ) == 0
+        return not self.blocks[0].refs.has_reference()
 
     def __getstate__(self):
         block_values = [b.values for b in self.blocks]
@@ -2026,19 +1924,18 @@ def getitem_mgr(self, indexer: slice | np.ndarray) -> SingleBlockManager:
             and com.is_bool_indexer(indexer)
             and indexer.all()
         ):
-            return type(self)(blk, self.index, [weakref.ref(blk)], parent=self)
+            return type(self)(blk.copy(deep=False), self.index)
         array = blk._slice(indexer)
         if array.ndim > 1:
             # This will be caught by Series._get_values
             raise ValueError("dimension-expanding indexing not allowed")
 
         bp = BlockPlacement(slice(0, len(array)))
-        block = type(blk)(array, placement=bp, ndim=1)
+        # TODO(CoW) in theory only need to track reference if new_array is a view
+        block = type(blk)(array, placement=bp, ndim=1, refs=blk.refs)
 
         new_idx = self.index[indexer]
-        # TODO(CoW) in theory only need to track reference if new_array is a view
-        ref = weakref.ref(blk)
-        return type(self)(block, new_idx, [ref], parent=self)
+        return type(self)(block, new_idx)
 
     def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager:
         # Assertion disabled for performance
@@ -2049,11 +1946,11 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager:
         blk = self._block
         array = blk._slice(slobj)
         bp = BlockPlacement(slice(0, len(array)))
-        block = type(blk)(array, placement=bp, ndim=1)
-        new_index = self.index._getitem_slice(slobj)
         # TODO this method is only used in groupby SeriesSplitter at the moment,
-        # so passing refs / parent is not yet covered by the tests
-        return type(self)(block, new_index, [weakref.ref(blk)], parent=self)
+        # so passing refs is not yet covered by the tests
+        block = type(blk)(array, placement=bp, ndim=1, refs=blk.refs)
+        new_index = self.index._getitem_slice(slobj)
+        return type(self)(block, new_index)
 
     @property
     def index(self) -> Index:
@@ -2099,8 +1996,6 @@ def setitem_inplace(self, indexer, value) -> None:
         """
         if using_copy_on_write() and not self._has_no_reference(0):
             self.blocks = (self._block.copy(),)
-            self.refs = None
-            self.parent = None
             self._cache.clear()
 
         super().setitem_inplace(indexer, value)
@@ -2115,9 +2010,6 @@ def idelete(self, indexer) -> SingleBlockManager:
         self.blocks = (nb,)
         self.axes[0] = self.axes[0].delete(indexer)
         self._cache.clear()
-        # clear reference since delete always results in a new array
-        self.refs = None
-        self.parent = None
         return self
 
     def fast_xs(self, loc):
@@ -2337,31 +2229,6 @@ def _consolidate(blocks: tuple[Block, ...]) -> tuple[Block, ...]:
     return tuple(new_blocks)
 
 
-def _consolidate_with_refs(
-    blocks: tuple[Block, ...], refs
-) -> tuple[tuple[Block, ...], list[weakref.ref | None]]:
-    """
-    Merge blocks having same dtype, exclude non-consolidating blocks, handling
-    refs
-    """
-    gkey = lambda x: x[0]._consolidate_key
-    grouper = itertools.groupby(sorted(zip(blocks, refs), key=gkey), gkey)
-
-    new_blocks: list[Block] = []
-    new_refs: list[weakref.ref | None] = []
-    for (_can_consolidate, dtype), group_blocks_refs in grouper:
-        group_blocks, group_refs = list(zip(*list(group_blocks_refs)))
-        merged_blocks, consolidated = _merge_blocks(
-            list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate
-        )
-        new_blocks = extend_blocks(merged_blocks, new_blocks)
-        if consolidated:
-            new_refs.extend([None])
-        else:
-            new_refs.extend(group_refs)
-    return tuple(new_blocks), new_refs
-
-
 def _merge_blocks(
     blocks: list[Block], dtype: DtypeObj, can_consolidate: bool
 ) -> tuple[list[Block], bool]:
diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py
index 477dc98aa2b2b..24fc51a96d9df 100644
--- a/pandas/core/internals/ops.py
+++ b/pandas/core/internals/ops.py
@@ -36,7 +36,7 @@ def _iter_block_pairs(
 
         left_ea = blk_vals.ndim == 1
 
-        rblks, _ = right._slice_take_blocks_ax0(locs.indexer, only_slice=True)
+        rblks = right._slice_take_blocks_ax0(locs.indexer, only_slice=True)
 
         # Assertions are disabled for performance, but should hold:
         # if left_ea:
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index f8220649bf890..43e0b77a90a85 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -14,7 +14,6 @@
     cast,
     overload,
 )
-import weakref
 
 import numpy as np
 
@@ -551,8 +550,9 @@ def __init__(
                     obj = sample._constructor(obj, columns=[name], copy=False)
                     if using_copy_on_write():
                         # TODO(CoW): Remove when ref tracking in constructors works
-                        obj._mgr.parent = original_obj  # type: ignore[union-attr]
-                        obj._mgr.refs = [weakref.ref(original_obj._mgr.blocks[0])]  # type: ignore[union-attr]  # noqa: E501
+                        for i, block in enumerate(original_obj._mgr.blocks):  # type: ignore[union-attr]  # noqa
+                            obj._mgr.blocks[i].refs = block.refs  # type: ignore[union-attr]  # noqa
+                            obj._mgr.blocks[i].refs.add_reference(obj._mgr.blocks[i])  # type: ignore[arg-type, union-attr]  # noqa
 
                     obj.columns = [new_name]
 
@@ -612,13 +612,9 @@ def get_result(self):
                     typ=get_option("mode.data_manager"),
                 )
                 if using_copy_on_write() and not self.copy:
-                    parents = [obj._mgr for obj in self.objs]
-                    mgr.parent = parents  # type: ignore[union-attr]
-                    refs = [
-                        weakref.ref(obj._mgr.blocks[0])  # type: ignore[union-attr]
-                        for obj in self.objs
-                    ]
-                    mgr.refs = refs  # type: ignore[union-attr]
+                    for i, obj in enumerate(self.objs):
+                        mgr.blocks[i].refs = obj._mgr.blocks[0].refs  # type: ignore[union-attr]  # noqa
+                        mgr.blocks[i].refs.add_reference(mgr.blocks[i])  # type: ignore[arg-type, union-attr]  # noqa
                 df = cons(mgr, copy=False)
                 df.columns = columns
                 return df.__finalize__(self, method="concat")
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
index f8a57b67f29fb..1b709a8aa8076 100644
--- a/pandas/tests/copy_view/test_constructors.py
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -20,7 +20,7 @@ def test_series_from_series(dtype, using_copy_on_write):
     assert np.shares_memory(ser.values, result.values)
 
     if using_copy_on_write:
-        assert result._mgr.refs is not None
+        assert result._mgr.blocks[0].refs.has_reference()
 
     if using_copy_on_write:
         # mutating new series copy doesn't mutate original
@@ -74,4 +74,4 @@ def test_series_from_series_with_reindex(using_copy_on_write):
     result = Series(ser, index=[0, 1, 2, 3])
     assert not np.shares_memory(ser.values, result.values)
     if using_copy_on_write:
-        assert result._mgr.refs is None or result._mgr.refs[0] is None
+        assert not result._mgr.blocks[0].refs.has_reference()
diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py
new file mode 100644
index 0000000000000..204e26b35d680
--- /dev/null
+++ b/pandas/tests/copy_view/test_core_functionalities.py
@@ -0,0 +1,88 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+from pandas.tests.copy_view.util import get_array
+
+
+def test_assigning_to_same_variable_removes_references(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3]})
+    df = df.reset_index()
+    if using_copy_on_write:
+        assert df._mgr._has_no_reference(1)
+    arr = get_array(df, "a")
+    df.iloc[0, 1] = 100  # Write into a
+
+    assert np.shares_memory(arr, get_array(df, "a"))
+
+
+def test_setitem_dont_track_unnecessary_references(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
+
+    df["b"] = 100
+    arr = get_array(df, "a")
+    # We split the block in setitem, if we are not careful the new blocks will
+    # reference each other triggering a copy
+    df.iloc[0, 0] = 100
+    assert np.shares_memory(arr, get_array(df, "a"))
+
+
+def test_setitem_with_view_copies(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
+    view = df[:]
+    expected = df.copy()
+
+    df["b"] = 100
+    arr = get_array(df, "a")
+    df.iloc[0, 0] = 100  # Check that we correctly track reference
+    if using_copy_on_write:
+        assert not np.shares_memory(arr, get_array(df, "a"))
+        tm.assert_frame_equal(view, expected)
+
+
+def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write, request):
+    df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
+    view = df[:]
+
+    df["b"] = 100
+    arr = get_array(df, "a")
+    view = None  # noqa
+    df.iloc[0, 0] = 100
+    if using_copy_on_write:
+        # Setitem split the block. Since the old block shared data with view
+        # all the new blocks are referencing view and each other. When view
+        # goes out of scope, they don't share data with any other block,
+        # so we should not trigger a copy
+        mark = pytest.mark.xfail(
+            reason="blk.delete does not track references correctly"
+        )
+        request.node.add_marker(mark)
+        assert np.shares_memory(arr, get_array(df, "a"))
+
+
+def test_out_of_scope(using_copy_on_write):
+    def func():
+        df = DataFrame({"a": [1, 2], "b": 1.5, "c": 1})
+        # create some subset
+        result = df[["a", "b"]]
+        return result
+
+    result = func()
+    if using_copy_on_write:
+        assert not result._mgr.blocks[0].refs.has_reference()
+        assert not result._mgr.blocks[1].refs.has_reference()
+
+
+def test_delete(using_copy_on_write):
+    df = DataFrame(np.random.randn(4, 3), columns=["a", "b", "c"])
+    del df["b"]
+    if using_copy_on_write:
+        # TODO: This should not have references, delete makes a shallow copy
+        # but keeps the blocks alive
+        assert df._mgr.blocks[0].refs.has_reference()
+        assert df._mgr.blocks[1].refs.has_reference()
+
+    df = df[["a"]]
+    if using_copy_on_write:
+        assert not df._mgr.blocks[0].refs.has_reference()
diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py
index 506ae7d3465c5..67022e533dbc4 100644
--- a/pandas/tests/copy_view/test_internals.py
+++ b/pandas/tests/copy_view/test_internals.py
@@ -20,52 +20,32 @@ def test_consolidate(using_copy_on_write):
     subset = df[:]
 
     # each block of subset references a block of df
-    assert subset._mgr.refs is not None and all(
-        ref is not None for ref in subset._mgr.refs
-    )
+    assert all(blk.refs.has_reference() for blk in subset._mgr.blocks)
 
     # consolidate the two int64 blocks
     subset._consolidate_inplace()
 
     # the float64 block still references the parent one because it still a view
-    assert subset._mgr.refs[0] is not None
+    assert subset._mgr.blocks[0].refs.has_reference()
     # equivalent of assert np.shares_memory(df["b"].values, subset["b"].values)
     # but avoids caching df["b"]
     assert np.shares_memory(get_array(df, "b"), get_array(subset, "b"))
 
     # the new consolidated int64 block does not reference another
-    assert subset._mgr.refs[1] is None
+    assert not subset._mgr.blocks[1].refs.has_reference()
 
     # the parent dataframe now also only is linked for the float column
-    assert df._mgr._has_no_reference(0)
-    assert not df._mgr._has_no_reference(1)
-    assert df._mgr._has_no_reference(2)
+    assert not df._mgr.blocks[0].refs.has_reference()
+    assert df._mgr.blocks[1].refs.has_reference()
+    assert not df._mgr.blocks[2].refs.has_reference()
 
     # and modifying subset still doesn't modify parent
     if using_copy_on_write:
         subset.iloc[0, 1] = 0.0
-        assert df._mgr._has_no_reference(1)
+        assert not df._mgr.blocks[1].refs.has_reference()
         assert df.loc[0, "b"] == 0.1
 
 
-@td.skip_array_manager_invalid_test
-def test_clear_parent(using_copy_on_write):
-    # ensure to clear parent reference if we are no longer viewing data from parent
-    if not using_copy_on_write:
-        pytest.skip("test only relevant when using copy-on-write")
-
-    df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
-    subset = df[:]
-    assert subset._mgr.parent is not None
-
-    # replacing existing columns loses the references to the parent df
-    subset["a"] = 0
-    assert subset._mgr.parent is not None
-    # when losing the last reference, also the parent should be reset
-    subset["b"] = 0
-    assert subset._mgr.parent is None
-
-
 @pytest.mark.single_cpu
 @td.skip_array_manager_invalid_test
 def test_switch_options():
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index 833e4feadf27e..59645c99d9929 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -24,7 +24,8 @@ def test_copy(using_copy_on_write):
     # the deep copy doesn't share memory
     assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
     if using_copy_on_write:
-        assert df_copy._mgr.refs is None
+        assert not df_copy._mgr.blocks[0].refs.has_reference()
+        assert not df_copy._mgr.blocks[1].refs.has_reference()
 
     # mutating copy doesn't mutate original
     df_copy.iloc[0, 0] = 0
@@ -38,7 +39,8 @@ def test_copy_shallow(using_copy_on_write):
     # the shallow copy still shares memory
     assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
     if using_copy_on_write:
-        assert df_copy._mgr.refs is not None
+        assert df_copy._mgr.blocks[0].refs.has_reference()
+        assert df_copy._mgr.blocks[1].refs.has_reference()
 
     if using_copy_on_write:
         # mutating shallow copy doesn't mutate original

From 31557be2c799993092e884cf0ad40fdc34783f97 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 7 Feb 2023 21:47:28 +0100
Subject: [PATCH 14/23] Refactor for new cow mechanism and move test

---
 pandas/core/dtypes/astype.py                  | 20 ++++++-------
 pandas/core/internals/blocks.py               | 28 +++++++++++--------
 pandas/core/internals/managers.py             | 20 +++++--------
 pandas/tests/arrays/test_array.py             | 11 --------
 pandas/tests/copy_view/test_methods.py        |  6 ++--
 .../tests/extension/decimal/test_decimal.py   | 11 ++++++++
 6 files changed, 47 insertions(+), 49 deletions(-)

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index 0bac9b463939b..22163fad63339 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -268,24 +268,22 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
         # Only equal numpy dtypes avoid a copy
         return False
 
-    if is_string_dtype(dtype) and is_string_dtype(new_dtype):
+    elif is_string_dtype(dtype) and is_string_dtype(new_dtype):
         return True
 
     elif is_object_dtype(dtype) and new_dtype.kind == "O":
         # When the underlying array has dtype object, we don't have to make a copy
         return True
 
-    elif is_string_dtype(dtype) or is_string_dtype(new_dtype):
-        return False
-
     elif dtype.kind in "mM" and new_dtype.kind in "mM":
         return True
 
-    elif getattr(dtype, "numpy_dtype", dtype) == getattr(
-        new_dtype, "numpy_dtype", new_dtype
-    ):
-        # If underlying numpy dtype is the same, no copy is made, e.g.
-        # int64 -> Int64 or int64[pyarrow]
-        return True
+    numpy_dtype = getattr(dtype, "numpy_dtype", None)
+    new_numpy_dtype = getattr(new_dtype, "numpy_dtype", None)
+
+    if numpy_dtype is not None and new_numpy_dtype is not None:
+        # if both have NumPy dtype then they are only views if they are equal
+        return numpy_dtype == new_numpy_dtype
 
-    return False
+    # Assume this is a view since we don't know for sure if a copy was made
+    return True
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 41b8e63cc7a64..273e2211f1325 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -11,12 +11,9 @@
     cast,
     final,
 )
-import weakref
 
 import numpy as np
 
-from pandas._config import using_copy_on_write
-
 from pandas._libs import (
     Timestamp,
     internals as libinternals,
@@ -162,7 +159,6 @@ class Block(PandasObject):
     is_extension = False
     _can_consolidate = True
     _validate_ndim = True
-    _ref = None
 
     @final
     @cache_readonly
@@ -214,7 +210,9 @@ def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None:
         self._mgr_locs = new_mgr_locs
 
     @final
-    def make_block(self, values, placement=None) -> Block:
+    def make_block(
+        self, values, placement=None, refs: BlockValuesRefs | None = None
+    ) -> Block:
         """
         Create a new block, with type inference propagate any values that are
         not specified
@@ -226,7 +224,7 @@ def make_block(self, values, placement=None) -> Block:
 
         # TODO: perf by not going through new_block
         # We assume maybe_coerce_values has already been called
-        return new_block(values, placement=placement, ndim=self.ndim)
+        return new_block(values, placement=placement, ndim=self.ndim, refs=refs)
 
     @final
     def make_block_same_class(
@@ -476,7 +474,11 @@ def dtype(self) -> DtypeObj:
 
     @final
     def astype(
-        self, dtype: DtypeObj, copy: bool = False, errors: IgnoreRaise = "raise"
+        self,
+        dtype: DtypeObj,
+        copy: bool = False,
+        errors: IgnoreRaise = "raise",
+        using_cow: bool = False,
     ) -> Block:
         """
         Coerce to the new dtype.
@@ -489,6 +491,8 @@ def astype(
         errors : str, {'raise', 'ignore'}, default 'raise'
             - ``raise`` : allow exceptions to be raised
             - ``ignore`` : suppress exceptions. On error return original object
+        using_cow: bool, default False
+            Signaling if copy on write copy logic is used.
 
         Returns
         -------
@@ -499,16 +503,18 @@ def astype(
         new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
 
         new_values = maybe_coerce_values(new_values)
-        newb = self.make_block(new_values)
+
+        refs = None
+        if using_cow and astype_is_view(values.dtype, new_values.dtype):
+            refs = self.refs
+
+        newb = self.make_block(new_values, refs=refs)
         if newb.shape != self.shape:
             raise TypeError(
                 f"cannot set astype for copy = [{copy}] for dtype "
                 f"({self.dtype.name} [{self.shape}]) to different shape "
                 f"({newb.dtype.name} [{newb.shape}])"
             )
-        if using_copy_on_write():
-            if astype_is_view(values.dtype, new_values.dtype):
-                newb._ref = weakref.ref(self)
         return newb
 
     @final
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index d3e406c25e455..4993d50603012 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -422,25 +422,19 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
         )
 
     def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T:
-        return self._apply_and_handle_refs(
-            "astype", copy=copy, dtype=dtype, errors=errors
-        )
-
-    def _apply_and_handle_refs(self, func: str, copy: bool | None, **kwargs):
         if copy is None:
             if using_copy_on_write():
                 copy = False
             else:
                 copy = True
 
-        result = self.apply(func, copy=copy, **kwargs)
-
-        if using_copy_on_write() and not copy:
-            refs = [blk._ref for blk in result.blocks]
-            if any(ref is not None for ref in refs):
-                result.refs = refs
-                result.parent = self
-        return result
+        return self.apply(
+            "astype",
+            dtype=dtype,
+            copy=copy,
+            errors=errors,
+            using_cow=using_copy_on_write(),
+        )
 
     def convert(self: T, copy: bool) -> T:
         return self.apply(
diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
index ff3a345f92c80..59e5c6fa2dda3 100644
--- a/pandas/tests/arrays/test_array.py
+++ b/pandas/tests/arrays/test_array.py
@@ -429,14 +429,3 @@ def test_array_to_numpy_na():
     result = arr.to_numpy(na_value=True, dtype=bool)
     expected = np.array([True, True])
     tm.assert_numpy_array_equal(result, expected)
-
-
-def test_array_copy_on_write(using_copy_on_write):
-    df = pd.DataFrame({"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype="object")
-    df2 = df.astype(DecimalDtype())
-    df.iloc[0, 0] = 0
-    if using_copy_on_write:
-        expected = pd.DataFrame(
-            {"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype()
-        )
-        tm.assert_equal(df2.values, expected.values)
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index 59645c99d9929..af86c81eb0de7 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas.compat import pa_version_under6p0
+from pandas.compat import pa_version_under7p0
 
 from pandas import (
     DataFrame,
@@ -556,7 +556,7 @@ def test_astype_single_dtype(using_copy_on_write):
 @pytest.mark.parametrize("dtype", ["int64", "Int64"])
 @pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
 def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
-    if new_dtype == "int64[pyarrow]" and pa_version_under6p0:
+    if new_dtype == "int64[pyarrow]" and pa_version_under7p0:
         pytest.skip("pyarrow not installed")
     df = DataFrame({"a": [1, 2, 3]}, dtype=dtype)
     df_orig = df.copy()
@@ -581,7 +581,7 @@ def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
 
 @pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"])
 def test_astype_different_target_dtype(using_copy_on_write, dtype):
-    if dtype == "int32[pyarrow]" and pa_version_under6p0:
+    if dtype == "int32[pyarrow]" and pa_version_under7p0:
         pytest.skip("pyarrow not installed")
     df = DataFrame({"a": [1, 2, 3]})
     df_orig = df.copy()
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index cdacbf9c5bb5a..90f89d71b15a9 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -483,3 +483,14 @@ def test_to_numpy_keyword():
 
     result = pd.Series(a).to_numpy(decimals=2)
     tm.assert_numpy_array_equal(result, expected)
+
+
+def test_array_copy_on_write(using_copy_on_write):
+    df = pd.DataFrame({"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype="object")
+    df2 = df.astype(DecimalDtype())
+    df.iloc[0, 0] = 0
+    if using_copy_on_write:
+        expected = pd.DataFrame(
+            {"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype()
+        )
+        tm.assert_equal(df2.values, expected.values)

From 031aaa51a3b8163b0ced8fdd95df224eac7afa1b Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 8 Feb 2023 10:17:52 +0100
Subject: [PATCH 15/23] Remove merge problem

---
 pandas/core/internals/blocks.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 0ccfea2c93752..6608720e3b6bc 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -271,7 +271,6 @@ def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:
 
         new_values = self._slice(slicer)
         refs = self.refs if isinstance(slicer, slice) else None
-        refs = self.refs if isinstance(slicer, slice) else None
         return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
 
     @final

From 174e2d3bd34ac6b368bdfd04c736f891c668307f Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 8 Feb 2023 10:19:30 +0100
Subject: [PATCH 16/23] Add comment

---
 pandas/core/dtypes/astype.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index 22163fad63339..64016e47acc0a 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -269,6 +269,7 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
         return False
 
     elif is_string_dtype(dtype) and is_string_dtype(new_dtype):
+        # Potentially! a copy when converting from object to string
         return True
 
     elif is_object_dtype(dtype) and new_dtype.kind == "O":

From 876d293360ab34a8c8e79fdfcc60e21ce2a71fc5 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Wed, 8 Feb 2023 15:34:35 +0100
Subject: [PATCH 17/23] Update pandas/core/dtypes/astype.py

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/core/dtypes/astype.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index 64016e47acc0a..331f539b2fc5a 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -259,7 +259,7 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
 
     Returns
     -------
-    True if new data is a view, False otherwise
+    True if new data is a view or not guaranteed to be a copy, False otherwise
     """
     if dtype == new_dtype:
         return True

From ff37e58e55e416b403fc2a2cf1681dff9b0a04ce Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 8 Feb 2023 15:42:19 +0100
Subject: [PATCH 18/23] Check numpy non numpy conversion better

---
 pandas/core/dtypes/astype.py           |   6 ++
 pandas/tests/copy_view/test_astype.py  | 144 +++++++++++++++++++++++++
 pandas/tests/copy_view/test_methods.py | 134 -----------------------
 3 files changed, 150 insertions(+), 134 deletions(-)
 create mode 100644 pandas/tests/copy_view/test_astype.py

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index 331f539b2fc5a..32b28192c3ba2 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -282,6 +282,12 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
     numpy_dtype = getattr(dtype, "numpy_dtype", None)
     new_numpy_dtype = getattr(new_dtype, "numpy_dtype", None)
 
+    if numpy_dtype is None and isinstance(dtype, np.dtype):
+        numpy_dtype = dtype
+
+    if new_numpy_dtype is None and isinstance(new_dtype, np.dtype):
+        numpy_dtype = new_dtype
+
     if numpy_dtype is not None and new_numpy_dtype is not None:
         # if both have NumPy dtype then they are only views if they are equal
         return numpy_dtype == new_numpy_dtype
diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py
new file mode 100644
index 0000000000000..dbf9f71ea95b0
--- /dev/null
+++ b/pandas/tests/copy_view/test_astype.py
@@ -0,0 +1,144 @@
+import numpy as np
+import pytest
+
+from pandas.compat import pa_version_under7p0
+
+from pandas import (
+    DataFrame,
+    Series,
+)
+import pandas._testing as tm
+from pandas.tests.copy_view.util import get_array
+
+
+def test_astype_single_dtype(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": 1.5})
+    df_orig = df.copy()
+    df2 = df.astype("float64")
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    # mutating df2 triggers a copy-on-write for that column/block
+    df2.iloc[0, 2] = 5.5
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+    tm.assert_frame_equal(df, df_orig)
+
+    # mutating parent also doesn't update result
+    df2 = df.astype("float64")
+    df.iloc[0, 2] = 5.5
+    tm.assert_frame_equal(df2, df_orig.astype("float64"))
+
+
+@pytest.mark.parametrize("dtype", ["int64", "Int64"])
+@pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
+def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
+    if new_dtype == "int64[pyarrow]" and pa_version_under7p0:
+        pytest.skip("pyarrow not installed")
+    df = DataFrame({"a": [1, 2, 3]}, dtype=dtype)
+    df_orig = df.copy()
+    df2 = df.astype(new_dtype)
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    # mutating df2 triggers a copy-on-write for that column/block
+    df2.iloc[0, 0] = 10
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    tm.assert_frame_equal(df, df_orig)
+
+    # mutating parent also doesn't update result
+    df2 = df.astype(new_dtype)
+    df.iloc[0, 0] = 100
+    tm.assert_frame_equal(df2, df_orig.astype(new_dtype))
+
+
+@pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"])
+def test_astype_different_target_dtype(using_copy_on_write, dtype):
+    if dtype == "int32[pyarrow]" and pa_version_under7p0:
+        pytest.skip("pyarrow not installed")
+    df = DataFrame({"a": [1, 2, 3]})
+    df_orig = df.copy()
+    df2 = df.astype(dtype)
+
+    assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    assert df2._mgr._has_no_reference(0)
+
+    df2.iloc[0, 0] = 5
+    tm.assert_frame_equal(df, df_orig)
+
+    # mutating parent also doesn't update result
+    df2 = df.astype(dtype)
+    df.iloc[0, 0] = 100
+    tm.assert_frame_equal(df2, df_orig.astype(dtype))
+
+
+@pytest.mark.parametrize(
+    "dtype, new_dtype", [("object", "string"), ("string", "object")]
+)
+def test_astype_string_and_object(using_copy_on_write, dtype, new_dtype):
+    df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
+    df_orig = df.copy()
+    df2 = df.astype(new_dtype)
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    df2.iloc[0, 0] = "x"
+    tm.assert_frame_equal(df, df_orig)
+
+
+@pytest.mark.parametrize(
+    "dtype, new_dtype", [("object", "string"), ("string", "object")]
+)
+def test_astype_string_and_object_update_original(
+    using_copy_on_write, dtype, new_dtype
+):
+    df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
+    df2 = df.astype(new_dtype)
+    df_orig = df2.copy()
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    df.iloc[0, 0] = "x"
+    tm.assert_frame_equal(df2, df_orig)
+
+
+def test_astype_dict_dtypes(using_copy_on_write):
+    df = DataFrame(
+        {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")}
+    )
+    df_orig = df.copy()
+    df2 = df.astype({"a": "float64", "c": "float64"})
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+        assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+        assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    # mutating df2 triggers a copy-on-write for that column/block
+    df2.iloc[0, 2] = 5.5
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+
+    df2.iloc[0, 1] = 10
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+    tm.assert_frame_equal(df, df_orig)
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index af86c81eb0de7..b814b9089aabd 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas.compat import pa_version_under7p0
-
 from pandas import (
     DataFrame,
     Index,
@@ -529,138 +527,6 @@ def test_to_frame(using_copy_on_write):
         tm.assert_frame_equal(df, expected)
 
 
-def test_astype_single_dtype(using_copy_on_write):
-    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": 1.5})
-    df_orig = df.copy()
-    df2 = df.astype("float64")
-
-    if using_copy_on_write:
-        assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-    else:
-        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-
-    # mutating df2 triggers a copy-on-write for that column/block
-    df2.iloc[0, 2] = 5.5
-    if using_copy_on_write:
-        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
-    tm.assert_frame_equal(df, df_orig)
-
-    # mutating parent also doesn't update result
-    df2 = df.astype("float64")
-    df.iloc[0, 2] = 5.5
-    tm.assert_frame_equal(df2, df_orig.astype("float64"))
-
-
-@pytest.mark.parametrize("dtype", ["int64", "Int64"])
-@pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
-def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
-    if new_dtype == "int64[pyarrow]" and pa_version_under7p0:
-        pytest.skip("pyarrow not installed")
-    df = DataFrame({"a": [1, 2, 3]}, dtype=dtype)
-    df_orig = df.copy()
-    df2 = df.astype(new_dtype)
-
-    if using_copy_on_write:
-        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-    else:
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-
-    # mutating df2 triggers a copy-on-write for that column/block
-    df2.iloc[0, 0] = 10
-    if using_copy_on_write:
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-    tm.assert_frame_equal(df, df_orig)
-
-    # mutating parent also doesn't update result
-    df2 = df.astype(new_dtype)
-    df.iloc[0, 0] = 100
-    tm.assert_frame_equal(df2, df_orig.astype(new_dtype))
-
-
-@pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"])
-def test_astype_different_target_dtype(using_copy_on_write, dtype):
-    if dtype == "int32[pyarrow]" and pa_version_under7p0:
-        pytest.skip("pyarrow not installed")
-    df = DataFrame({"a": [1, 2, 3]})
-    df_orig = df.copy()
-    df2 = df.astype(dtype)
-
-    assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-
-    df2.iloc[0, 0] = 5
-    tm.assert_frame_equal(df, df_orig)
-
-    # mutating parent also doesn't update result
-    df2 = df.astype(dtype)
-    df.iloc[0, 0] = 100
-    tm.assert_frame_equal(df2, df_orig.astype(dtype))
-
-
-@pytest.mark.parametrize(
-    "dtype, new_dtype", [("object", "string"), ("string", "object")]
-)
-def test_astype_string_and_object(using_copy_on_write, dtype, new_dtype):
-    df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
-    df_orig = df.copy()
-    df2 = df.astype(new_dtype)
-
-    if using_copy_on_write:
-        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-    else:
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-
-    df2.iloc[0, 0] = "x"
-    tm.assert_frame_equal(df, df_orig)
-
-
-@pytest.mark.parametrize(
-    "dtype, new_dtype", [("object", "string"), ("string", "object")]
-)
-def test_astype_string_and_object_update_original(
-    using_copy_on_write, dtype, new_dtype
-):
-    df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
-    df2 = df.astype(new_dtype)
-    df_orig = df2.copy()
-
-    if using_copy_on_write:
-        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-    else:
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-
-    df.iloc[0, 0] = "x"
-    tm.assert_frame_equal(df2, df_orig)
-
-
-def test_astype_dict_dtypes(using_copy_on_write):
-    df = DataFrame(
-        {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")}
-    )
-    df_orig = df.copy()
-    df2 = df.astype({"a": "float64", "c": "float64"})
-
-    if using_copy_on_write:
-        assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
-        assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-    else:
-        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
-        assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-
-    # mutating df2 triggers a copy-on-write for that column/block
-    df2.iloc[0, 2] = 5.5
-    if using_copy_on_write:
-        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
-
-    df2.iloc[0, 1] = 10
-    if using_copy_on_write:
-        assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
-    tm.assert_frame_equal(df, df_orig)
-
-
 @pytest.mark.parametrize("ax", ["index", "columns"])
 def test_swapaxes_noop(using_copy_on_write, ax):
     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})

From 5492c10cb51be5b31ba02e770b5d17cac281faa4 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 8 Feb 2023 15:53:51 +0100
Subject: [PATCH 19/23] Add explicit timezone tests

---
 pandas/core/dtypes/astype.py          |  4 +++-
 pandas/tests/copy_view/test_astype.py | 32 +++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index 32b28192c3ba2..8fcd39a8252e8 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -277,7 +277,9 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
         return True
 
     elif dtype.kind in "mM" and new_dtype.kind in "mM":
-        return True
+        #  Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no
+        #  attribute "unit"
+        return dtype.unit == new_dtype.unit  # type: ignore[union-attr]
 
     numpy_dtype = getattr(dtype, "numpy_dtype", None)
     new_numpy_dtype = getattr(new_dtype, "numpy_dtype", None)
diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py
index dbf9f71ea95b0..a01ef727c2809 100644
--- a/pandas/tests/copy_view/test_astype.py
+++ b/pandas/tests/copy_view/test_astype.py
@@ -6,6 +6,7 @@
 from pandas import (
     DataFrame,
     Series,
+    date_range,
 )
 import pandas._testing as tm
 from pandas.tests.copy_view.util import get_array
@@ -142,3 +143,34 @@ def test_astype_dict_dtypes(using_copy_on_write):
     if using_copy_on_write:
         assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
     tm.assert_frame_equal(df, df_orig)
+
+
+def test_astype_different_datetime_resos(using_copy_on_write):
+    df = DataFrame({"a": date_range("2019-12-31", periods=2, freq="D")})
+    result = df.astype("datetime64[ms]")
+
+    assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
+    if using_copy_on_write:
+        assert result._mgr._has_no_reference(0)
+
+
+def test_astype_different_timezones(using_copy_on_write):
+    df = DataFrame(
+        {"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")}
+    )
+    result = df.astype("datetime64[ns, Europe/Berlin]")
+    if using_copy_on_write:
+        assert not result._mgr._has_no_reference(0)
+        assert np.shares_memory(get_array(df, "a").asi8, get_array(result, "a").asi8)
+
+
+def test_astype_different_timezones_different_reso(using_copy_on_write):
+    df = DataFrame(
+        {"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")}
+    )
+    result = df.astype("datetime64[ms, Europe/Berlin]")
+    if using_copy_on_write:
+        assert result._mgr._has_no_reference(0)
+        assert not np.shares_memory(
+            get_array(df, "a").asi8, get_array(result, "a").asi8
+        )

From 620181c670bc542955e72d4a3cd2755d678e7cf6 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 8 Feb 2023 15:56:25 +0100
Subject: [PATCH 20/23] Adjust comment

---
 pandas/core/dtypes/astype.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index 8fcd39a8252e8..dff31fdbb3c03 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -269,7 +269,7 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
         return False
 
     elif is_string_dtype(dtype) and is_string_dtype(new_dtype):
-        # Potentially! a copy when converting from object to string
+        # Potentially! a view when converting from object to string
         return True
 
     elif is_object_dtype(dtype) and new_dtype.kind == "O":
@@ -291,7 +291,10 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
         numpy_dtype = new_dtype
 
     if numpy_dtype is not None and new_numpy_dtype is not None:
-        # if both have NumPy dtype then they are only views if they are equal
+        # if both have NumPy dtype or one of them is a numpy dtype
+        # they are only a view when the numpy dtypes are equal, e.g.
+        # int64 -> Int64 or int64[pyarrow]
+        # int64 -> Int32 copies
         return numpy_dtype == new_numpy_dtype
 
     # Assume this is a view since we don't know for sure if a copy was made

From 95a8296a2fd69a34862aca3608473fac30bd9733 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 8 Feb 2023 16:04:54 +0100
Subject: [PATCH 21/23] Add to whatsnew

---
 doc/source/whatsnew/v2.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 70c60401f29fb..36f1a77badc95 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -224,6 +224,7 @@ Copy-on-Write improvements
   - :meth:`DataFrame.truncate`
   - :meth:`DataFrame.tz_convert` / :meth:`Series.tz_localize`
   - :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects`
+  - :meth:`DataFrame.astype` / :meth:`Series.astype`
   - :func:`concat`
 
   These methods return views when Copy-on-Write is enabled, which provides a significant

From 7640c36276d90305d64c658f53e2600a9172fe50 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 8 Feb 2023 17:44:17 +0100
Subject: [PATCH 22/23] Fix tests

---
 pandas/core/dtypes/astype.py          |  6 +++---
 pandas/tests/copy_view/test_astype.py | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index dff31fdbb3c03..4780ec176f6d0 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -277,9 +277,9 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
         return True
 
     elif dtype.kind in "mM" and new_dtype.kind in "mM":
-        #  Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no
-        #  attribute "unit"
-        return dtype.unit == new_dtype.unit  # type: ignore[union-attr]
+        dtype = getattr(dtype, "numpy_dtype", dtype)
+        new_dtype = getattr(new_dtype, "numpy_dtype", new_dtype)
+        return getattr(dtype, "unit", None) == getattr(new_dtype, "unit", None)
 
     numpy_dtype = getattr(dtype, "numpy_dtype", None)
     new_numpy_dtype = getattr(new_dtype, "numpy_dtype", None)
diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py
index a01ef727c2809..d12d6650e7434 100644
--- a/pandas/tests/copy_view/test_astype.py
+++ b/pandas/tests/copy_view/test_astype.py
@@ -6,6 +6,7 @@
 from pandas import (
     DataFrame,
     Series,
+    Timestamp,
     date_range,
 )
 import pandas._testing as tm
@@ -174,3 +175,20 @@ def test_astype_different_timezones_different_reso(using_copy_on_write):
         assert not np.shares_memory(
             get_array(df, "a").asi8, get_array(result, "a").asi8
         )
+
+
+@pytest.mark.skipif(pa_version_under7p0, reason="pyarrow not installed")
+def test_astype_arrow_timestamp(using_copy_on_write):
+    df = DataFrame(
+        {
+            "a": [
+                Timestamp("2020-01-01 01:01:01.000001"),
+                Timestamp("2020-01-01 01:01:01.000001"),
+            ]
+        },
+        dtype="M8[ns]",
+    )
+    result = df.astype("timestamp[ns][pyarrow]")
+    if using_copy_on_write:
+        assert not result._mgr._has_no_reference(0)
+        assert np.shares_memory(get_array(df, "a").asi8, get_array(result, "a")._data)

From 469383d9c509be12efb7168829ea9fa5a7f774de Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 8 Feb 2023 23:07:59 +0100
Subject: [PATCH 23/23] Fix array manager

---
 pandas/tests/copy_view/test_astype.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py
index d12d6650e7434..a485275a28ac4 100644
--- a/pandas/tests/copy_view/test_astype.py
+++ b/pandas/tests/copy_view/test_astype.py
@@ -72,7 +72,8 @@ def test_astype_different_target_dtype(using_copy_on_write, dtype):
     df2 = df.astype(dtype)
 
     assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-    assert df2._mgr._has_no_reference(0)
+    if using_copy_on_write:
+        assert df2._mgr._has_no_reference(0)
 
     df2.iloc[0, 0] = 5
     tm.assert_frame_equal(df, df_orig)