pandas-dev · jorisvandenbossche · Feb 9, 2023 · Jan 17, 2023 · Jan 17, 2023 · Jan 17, 2023
diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
@@ -271,6 +271,10 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
     if is_string_dtype(dtype) and is_string_dtype(new_dtype):
         return True
 
+    elif is_object_dtype(dtype) and new_dtype.kind == "O":
+        # When the underlying array has dtype object, we don't have to make a copy
+        return True
+
     elif is_string_dtype(dtype) or is_string_dtype(new_dtype):
         return False
 
@@ -280,6 +284,8 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
     elif getattr(dtype, "numpy_dtype", dtype) == getattr(
         new_dtype, "numpy_dtype", new_dtype
     ):
+        # If underlying numpy dtype is the same, no copy is made, e.g.
+        # int64 -> Int64 or int64[pyarrow]
         return True
 
     return False
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6302,11 +6302,7 @@ def astype(
             for i, (col_name, col) in enumerate(self.items()):
                 cdt = dtype_ser.iat[i]
                 if isna(cdt):
-                    if using_copy_on_write():
-                        # Make a shallow copy even if copy=False for CoW
-                        res_col = col.copy(deep=copy)
-                    else:
-                        res_col = col if copy is False else col.copy()
+                    res_col = col.copy(deep=copy)
                 else:
                     try:
                         res_col = col.astype(dtype=cdt, copy=copy, errors=errors)

diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
@@ -421,3 +421,14 @@ def test_array_to_numpy_na():
     result = arr.to_numpy(na_value=True, dtype=bool)
     expected = np.array([True, True])
     tm.assert_numpy_array_equal(result, expected)
+
+
+def test_array_copy_on_write(using_copy_on_write):
+    df = pd.DataFrame({"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype="object")
+    df2 = df.astype(DecimalDtype())
+    df.iloc[0, 0] = 0
+    if using_copy_on_write:
+        expected = pd.DataFrame(
+            {"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype()
+        )
+        tm.assert_equal(df2.values, expected.values)
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
@@ -1,18 +1,20 @@
 import numpy as np
+import pytest
 
 from pandas import Series
 
 # -----------------------------------------------------------------------------
 # Copy/view behaviour for Series / DataFrame constructors
 
 
-def test_series_from_series(using_copy_on_write):
+@pytest.mark.parametrize("dtype", [None, "int64"])
+def test_series_from_series(dtype, using_copy_on_write):
     # Case: constructing a Series from another Series object follows CoW rules:
     # a new object is returned and thus mutations are not propagated
     ser = Series([1, 2, 3], name="name")
 
     # default is copy=False -> new Series is a shallow copy / view of original
-    result = Series(ser)
+    result = Series(ser, dtype=dtype)
 
     # the shallow copy still shares memory
     assert np.shares_memory(ser.values, result.values)
@@ -34,7 +36,7 @@ def test_series_from_series(using_copy_on_write):
         assert np.shares_memory(ser.values, result.values)
 
     # the same when modifying the parent
-    result = Series(ser)
+    result = Series(ser, dtype=dtype)
 
     if using_copy_on_write:
         # mutating original doesn't mutate new series

diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
@@ -545,6 +545,11 @@ def test_astype_single_dtype(using_copy_on_write):
         assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
     tm.assert_frame_equal(df, df_orig)
-    tm.assert_frame_equal(df, df_orig)
+    tm.assert_frame_equal(df, df_orig)
+
+    # mutating parent also doesn't update result
+    df2 = df.astype("float64")
+    df.iloc[0, 2] = 5.5
+    tm.assert_frame_equal(df2, df_orig.astype("float64")
-    tm.assert_frame_equal(df, df_orig)
+    tm.assert_frame_equal(df, df_orig)
+
+    # mutating parent also doesn't update result
+    df2 = df.astype("float64")
+    df.iloc[0, 2] = 5.5
+    tm.assert_frame_equal(df2, df_orig.astype("float64")
 
+    # mutating parent also doesn't update result
+    df2 = df.astype("float64")
+    df.iloc[0, 2] = 5.5
+    tm.assert_frame_equal(df2, df_orig.astype("float64"))
+
 
 @pytest.mark.parametrize("dtype", ["int64", "Int64"])
 @pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
@@ -558,8 +563,6 @@ def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
     if using_copy_on_write:
         assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
     else:
-        if new_dtype == "int64[pyarrow]":
-            pytest.skip("Does not make a copy")
         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
 
     # mutating df2 triggers a copy-on-write for that column/block
@@ -568,6 +571,11 @@ def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
     tm.assert_frame_equal(df, df_orig)
 
+    # mutating parent also doesn't update result
+    df2 = df.astype(new_dtype)
+    df.iloc[0, 0] = 100
+    tm.assert_frame_equal(df2, df_orig.astype(new_dtype))
+
 
 @pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"])
 def test_astype_different_target_dtype(using_copy_on_write, dtype):
@@ -577,14 +585,16 @@ def test_astype_different_target_dtype(using_copy_on_write, dtype):
     df_orig = df.copy()
     df2 = df.astype(dtype)
 
-    if using_copy_on_write:
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-    else:
-        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
-    assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    assert df2._mgr._has_no_reference(0)
-    assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    assert df2._mgr._has_no_reference(0)
 
     df2.iloc[0, 0] = 5
     tm.assert_frame_equal(df, df_orig)
 
+    # mutating parent also doesn't update result
+    df2 = df.astype(dtype)
+    df.iloc[0, 0] = 100
+    tm.assert_frame_equal(df2, df_orig.astype(dtype))
+
 
 @pytest.mark.parametrize(
     "dtype, new_dtype", [("object", "string"), ("string", "object")]