From 6ce330604b8434ba214a7bda0efc9fb9ef07a571 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 16 Jan 2023 18:34:26 +0100
Subject: [PATCH 01/16] API / CoW: DataFrame(<dict of Series>, copy=False)
 constructor now gives lazy copy

---
 pandas/core/internals/construction.py       | 32 +++++++++++------
 pandas/core/internals/managers.py           | 26 +++++++++++++-
 pandas/tests/copy_view/test_constructors.py | 39 ++++++++++++++++++++-
 3 files changed, 85 insertions(+), 12 deletions(-)
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 9bdfd7991689b..b84ae52d16369 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -116,7 +116,7 @@ def arrays_to_mgr(
             index = ensure_index(index)
 
         # don't force copy because getting jammed in an ndarray anyway
-        arrays = _homogenize(arrays, index, dtype)
+        arrays, parents = _homogenize(arrays, index, dtype)
         # _homogenize ensures
         #  - all(len(x) == len(index) for x in arrays)
         #  - all(x.ndim == 1 for x in arrays)
@@ -125,7 +125,8 @@ def arrays_to_mgr(
 
     else:
         index = ensure_index(index)
-        arrays = [extract_array(x, extract_numpy=True) for x in arrays]
+        # arrays = [extract_array(x, extract_numpy=True) for x in arrays]
+        parents = None
 
         # Reached via DataFrame._from_arrays; we do validation here
         for arr in arrays:
@@ -148,7 +149,10 @@ def arrays_to_mgr(
 
     if typ == "block":
         return create_block_manager_from_column_arrays(
-            arrays, axes, consolidate=consolidate
+            arrays,
+            axes,
+            consolidate=consolidate,
+            parents=parents,
         )
     elif typ == "array":
         return ArrayManager(arrays, [index, columns])
@@ -550,17 +554,22 @@ def _ensure_2d(values: np.ndarray) -> np.ndarray:
 def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
     oindex = None
     homogenized = []
+    # if the original array-like in `data` is a Series, keep track of this Series
+    parents = []
 
     for val in data:
         if isinstance(val, ABCSeries):
+            hval = val
             if dtype is not None:
-                val = val.astype(dtype, copy=False)
-            if val.index is not index:
+                hval = hval.astype(dtype, copy=False)
+            if hval.index is not index:
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later
-                val = val.reindex(index, copy=False)
-
-            val = val._values
+                hval = hval.reindex(index, copy=False)
+            if hval is val:
+                hval = val.copy(deep=False)
+            homogenized.append(hval._values)
+            parents.append(hval)
         else:
             if isinstance(val, dict):
                 # GH#41785 this _should_ be equivalent to (but faster than)
@@ -578,10 +587,13 @@ def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
 
             val = sanitize_array(val, index, dtype=dtype, copy=False)
             com.require_length_match(val, index)
+            homogenized.append(val)
+            parents.append(None)
 
-        homogenized.append(val)
+    if com.all_none(*parents):
+        parents = None
 
-    return homogenized
+    return homogenized, parents
 
 
 def _extract_index(data) -> Index:
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 2ce294f257d75..eba68850d9fb4 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -2125,6 +2125,7 @@ def create_block_manager_from_column_arrays(
     arrays: list[ArrayLike],
     axes: list[Index],
     consolidate: bool = True,
+    parents: list | None = None,
 ) -> BlockManager:
     # Assertions disabled for performance (caller is responsible for verifying)
     # assert isinstance(axes, list)
@@ -2139,7 +2140,30 @@ def create_block_manager_from_column_arrays(
 
     try:
         blocks = _form_blocks(arrays, consolidate)
-        mgr = BlockManager(blocks, axes, verify_integrity=False)
+        refs = None
+        parent = None
+        if parents is not None and using_copy_on_write():
+            # elements in `parents` are Series objects *if* the original input
+            # for the column was a Series, or otherwise None
+            # -> in case of a Series, keep track of its refs if it has those
+            # (this Series is already a view on the original one, so we can
+            # directly use its ref instead of creating a new ref to this Series)
+            refs = []
+            parent = []
+            for ser in parents:
+                if (
+                    ser is not None
+                    and ser._mgr.refs is not None
+                    and (ref := ser._mgr.refs[0]) is not None
+                ):
+                    refs.append(ref)
+                    parent.append(ser)
+                else:
+                    refs.append(None)
+
+        mgr = BlockManager(
+            blocks, axes, refs=refs, parent=parent, verify_integrity=False
+        )
     except ValueError as e:
         raise_construction_error(len(arrays), arrays[0].shape, axes, e)
     if consolidate:
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
index c04c733e5ee1d..c88aab428bda7 100644
--- a/pandas/tests/copy_view/test_constructors.py
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -1,6 +1,11 @@
 import numpy as np
 
-from pandas import Series
+from pandas import (
+    DataFrame,
+    Series,
+)
+import pandas._testing as tm
+from pandas.tests.copy_view.util import get_array
 
 # -----------------------------------------------------------------------------
 # Copy/view behaviour for Series / DataFrame constructors
@@ -73,3 +78,35 @@ def test_series_from_series_with_reindex(using_copy_on_write):
     assert not np.shares_memory(ser.values, result.values)
     if using_copy_on_write:
         assert result._mgr.refs is None or result._mgr.refs[0] is None
+
+
+def test_dataframe_from_dict_of_series(using_copy_on_write):
+    # Case: constructing a DataFrame from Series objects with copy=False
+    # has to do a lazy following CoW rules
+    # (the default for DataFrame(dict) is still to copy to ensure consolidation)
+    s1 = Series([1, 2, 3])
+    s2 = Series([4, 5, 6])
+    s1_orig = s1.copy()
+    expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+    result = DataFrame({"a": s1, "b": s2}, copy=False)
+
+    # the shallow copy still shares memory
+    assert np.shares_memory(get_array(result, "a"), s1.values)
+
+    # mutating the new dataframe doesn't mutate original
+    result.iloc[0, 0] = 10
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(result, "a"), s1.values)
+        tm.assert_series_equal(s1, s1_orig)
+    else:
+        assert s1.iloc[0] == 10
+
+    # the same when modifying the parent series
+    result = DataFrame({"a": s1, "b": s2}, copy=False)
+    s1.iloc[0] = 10
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(result, "a"), s1.values)
+        tm.assert_frame_equal(result, expected)
+    else:
+        assert result.iloc[0, 0] == 10

From 104cab907eb7ac6917600c8435dad014ac0dd868 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 16 Jan 2023 22:15:48 +0100
Subject: [PATCH 02/16] expand test

---
 pandas/core/internals/construction.py       | 12 +++++++++++-
 pandas/tests/copy_view/test_constructors.py | 11 +++++++----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index b84ae52d16369..31ede7b905549 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -10,10 +10,13 @@
     Hashable,
     Sequence,
 )
+import weakref
 
 import numpy as np
 from numpy import ma
 
+from pandas._config import using_copy_on_write
+
 from pandas._libs import lib
 from pandas._typing import (
     ArrayLike,
@@ -56,6 +59,7 @@
     ExtensionArray,
     FloatingArray,
     IntegerArray,
+    PandasArray,
 )
 from pandas.core.arrays.string_ import StringDtype
 from pandas.core.construction import (
@@ -125,7 +129,9 @@ def arrays_to_mgr(
 
     else:
         index = ensure_index(index)
-        # arrays = [extract_array(x, extract_numpy=True) for x in arrays]
+        arrays = [
+            arr.to_numpy() if isinstance(arr, PandasArray) else arr for arr in arrays
+        ]
         parents = None
 
         # Reached via DataFrame._from_arrays; we do validation here
@@ -562,6 +568,10 @@ def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
             hval = val
             if dtype is not None:
                 hval = hval.astype(dtype, copy=False)
+                if using_copy_on_write() and hval.values is val.values:
+                    # TODO(CoW) remove when astype() has implemented CoW
+                    hval._mgr.refs = [weakref.ref(val._mgr._block)]
+                    hval._mgr.parent = val._mgr
             if hval.index is not index:
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
index c88aab428bda7..01c21a800a885 100644
--- a/pandas/tests/copy_view/test_constructors.py
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 from pandas import (
     DataFrame,
@@ -80,16 +81,18 @@ def test_series_from_series_with_reindex(using_copy_on_write):
         assert result._mgr.refs is None or result._mgr.refs[0] is None
 
 
-def test_dataframe_from_dict_of_series(using_copy_on_write):
+@pytest.mark.parametrize("dtype", [None, "int64"])
+@pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])
+def test_dataframe_from_dict_of_series(using_copy_on_write, columns, dtype):
     # Case: constructing a DataFrame from Series objects with copy=False
     # has to do a lazy following CoW rules
     # (the default for DataFrame(dict) is still to copy to ensure consolidation)
     s1 = Series([1, 2, 3])
     s2 = Series([4, 5, 6])
     s1_orig = s1.copy()
-    expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=dtype, columns=columns)
 
-    result = DataFrame({"a": s1, "b": s2}, copy=False)
+    result = DataFrame({"a": s1, "b": s2}, columns=columns, dtype=dtype, copy=False)
 
     # the shallow copy still shares memory
     assert np.shares_memory(get_array(result, "a"), s1.values)
@@ -103,7 +106,7 @@ def test_dataframe_from_dict_of_series(using_copy_on_write):
         assert s1.iloc[0] == 10
 
     # the same when modifying the parent series
-    result = DataFrame({"a": s1, "b": s2}, copy=False)
+    result = DataFrame({"a": s1, "b": s2}, columns=columns, dtype=dtype, copy=False)
     s1.iloc[0] = 10
     if using_copy_on_write:
         assert not np.shares_memory(get_array(result, "a"), s1.values)

From e192a632934b40ec3f4febb88a833bfc860b143c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 16 Jan 2023 23:44:52 +0100
Subject: [PATCH 03/16] try fix mypy

---
 pandas/core/internals/construction.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 31ede7b905549..cad776e1c3b9a 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -557,11 +557,13 @@ def _ensure_2d(values: np.ndarray) -> np.ndarray:
     return values
 
 
-def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
+def _homogenize(
+    data, index: Index, dtype: DtypeObj | None
+) -> tuple[list[ArrayLike], list[Any] | None]:
     oindex = None
     homogenized = []
     # if the original array-like in `data` is a Series, keep track of this Series
-    parents = []
+    parents: list[Any] = []
 
     for val in data:
         if isinstance(val, ABCSeries):
@@ -600,10 +602,7 @@ def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
             homogenized.append(val)
             parents.append(None)
 
-    if com.all_none(*parents):
-        parents = None
-
-    return homogenized, parents
+    return homogenized, None if com.all_none(*parents) else parents
 
 
 def _extract_index(data) -> Index:

From a300b0889a4c8658101961252fc0119ba2ee4a12 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 17 Jan 2023 09:30:18 +0100
Subject: [PATCH 04/16] clean-up usage of internals in reshape/concat.py

---
 pandas/core/reshape/concat.py | 39 ++++++-----------------------------
 1 file changed, 6 insertions(+), 33 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index f8220649bf890..0de995d2f773f 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -14,14 +14,10 @@
     cast,
     overload,
 )
-import weakref
 
 import numpy as np
 
-from pandas._config import (
-    get_option,
-    using_copy_on_write,
-)
+from pandas._config import using_copy_on_write
 
 from pandas._typing import (
     Axis,
@@ -53,7 +49,6 @@
     get_unanimous_names,
 )
 from pandas.core.internals import concatenate_managers
-from pandas.core.internals.construction import dict_to_mgr
 
 if TYPE_CHECKING:
     from pandas import (
@@ -536,25 +531,18 @@ def __init__(
                     )
 
                 else:
-                    original_obj = obj
-                    name = new_name = getattr(obj, "name", None)
+                    name = getattr(obj, "name", None)
                     if ignore_index or name is None:
-                        new_name = current_column
+                        name = current_column
                         current_column += 1
 
                     # doing a row-wise concatenation so need everything
                     # to line up
                     if self._is_frame and axis == 1:
-                        new_name = 0
+                        name = 0
                     # mypy needs to know sample is not an NDFrame
                     sample = cast("DataFrame | Series", sample)
-                    obj = sample._constructor(obj, columns=[name], copy=False)
-                    if using_copy_on_write():
-                        # TODO(CoW): Remove when ref tracking in constructors works
-                        obj._mgr.parent = original_obj  # type: ignore[union-attr]
-                        obj._mgr.refs = [weakref.ref(original_obj._mgr.blocks[0])]  # type: ignore[union-attr]  # noqa: E501
-
-                    obj.columns = [new_name]
+                    obj = sample._constructor({name: obj}, copy=False)
 
                 self.objs.append(obj)
 
@@ -604,22 +592,7 @@ def get_result(self):
                 cons = sample._constructor_expanddim
 
                 index, columns = self.new_axes
-                mgr = dict_to_mgr(
-                    data,
-                    index,
-                    None,
-                    copy=self.copy,
-                    typ=get_option("mode.data_manager"),
-                )
-                if using_copy_on_write() and not self.copy:
-                    parents = [obj._mgr for obj in self.objs]
-                    mgr.parent = parents  # type: ignore[union-attr]
-                    refs = [
-                        weakref.ref(obj._mgr.blocks[0])  # type: ignore[union-attr]
-                        for obj in self.objs
-                    ]
-                    mgr.refs = refs  # type: ignore[union-attr]
-                df = cons(mgr, copy=False)
+                df = cons(data, index=index, copy=self.copy)
                 df.columns = columns
                 return df.__finalize__(self, method="concat")
 

From df21ca2b55d910f199c522651e43c851241fd606 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 17 Jan 2023 09:39:27 +0100
Subject: [PATCH 05/16] further fix mypy

---
 pandas/core/internals/construction.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index cad776e1c3b9a..c8eaa71de6a63 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -572,8 +572,9 @@ def _homogenize(
                 hval = hval.astype(dtype, copy=False)
                 if using_copy_on_write() and hval.values is val.values:
                     # TODO(CoW) remove when astype() has implemented CoW
-                    hval._mgr.refs = [weakref.ref(val._mgr._block)]
-                    hval._mgr.parent = val._mgr
+                    refs = [weakref.ref(val._mgr._block)]  # type: ignore[union-attr]
+                    hval._mgr.refs = refs  # type: ignore[union-attr]
+                    hval._mgr.parent = val._mgr  # type: ignore[union-attr]
             if hval.index is not index:
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later

From e7d2f0f0d297819a89005bc3f1fe69096f785758 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 20 Jan 2023 16:04:58 +0100
Subject: [PATCH 06/16] expand tests

---
 pandas/tests/copy_view/test_constructors.py | 48 +++++++++++++++++++--
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
index 01c21a800a885..45b142d9e9458 100644
--- a/pandas/tests/copy_view/test_constructors.py
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -82,17 +82,22 @@ def test_series_from_series_with_reindex(using_copy_on_write):
 
 
 @pytest.mark.parametrize("dtype", [None, "int64"])
+@pytest.mark.parametrize("index", [None, [0, 1, 2]])
 @pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])
-def test_dataframe_from_dict_of_series(using_copy_on_write, columns, dtype):
+def test_dataframe_from_dict_of_series(using_copy_on_write, columns, index, dtype):
     # Case: constructing a DataFrame from Series objects with copy=False
     # has to do a lazy following CoW rules
     # (the default for DataFrame(dict) is still to copy to ensure consolidation)
     s1 = Series([1, 2, 3])
     s2 = Series([4, 5, 6])
     s1_orig = s1.copy()
-    expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=dtype, columns=columns)
+    expected = DataFrame(
+        {"a": [1, 2, 3], "b": [4, 5, 6]}, index=index, columns=columns, dtype=dtype
+    )
 
-    result = DataFrame({"a": s1, "b": s2}, columns=columns, dtype=dtype, copy=False)
+    result = DataFrame(
+        {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
+    )
 
     # the shallow copy still shares memory
     assert np.shares_memory(get_array(result, "a"), s1.values)
@@ -106,10 +111,45 @@ def test_dataframe_from_dict_of_series(using_copy_on_write, columns, dtype):
         assert s1.iloc[0] == 10
 
     # the same when modifying the parent series
-    result = DataFrame({"a": s1, "b": s2}, columns=columns, dtype=dtype, copy=False)
+    result = DataFrame(
+        {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
+    )
     s1.iloc[0] = 10
     if using_copy_on_write:
         assert not np.shares_memory(get_array(result, "a"), s1.values)
         tm.assert_frame_equal(result, expected)
     else:
         assert result.iloc[0, 0] == 10
+
+
+@pytest.mark.parametrize("dtype", [None, "int64"])
+def test_dataframe_from_dict_of_series_with_reindex(dtype):
+    # Case: constructing a DataFrame from Series objects with copy=False
+    # and passing an index that requires an actual (no-view) reindex -> need
+    # to ensure the result doesn't have refs set up to unnecessarily trigger
+    # a copy on write
+    s1 = Series([1, 2, 3])
+    s2 = Series([4, 5, 6])
+    df = DataFrame({"a": s1, "b": s2}, index=[1, 2, 3], dtype=dtype, copy=False)
+
+    # df should own its memory, so mutating shouldn't trigger a copy
+    arr_before = get_array(df, "a")
+    df.iloc[0, 0] = 100
+    arr_after = get_array(df, "a")
+    assert np.shares_memory(arr_before, arr_after)
+
+
+@pytest.mark.parametrize("index", [None, [0, 1, 2]])
+def test_dataframe_from_dict_of_series_with_dtype(index):
+    # Variant of above, but now passing a dtype that causes a copy
+    # -> need to ensure the result doesn't have refs set up to unnecessarily
+    # trigger a copy on write
+    s1 = Series([1.0, 2.0, 3.0])
+    s2 = Series([4, 5, 6])
+    df = DataFrame({"a": s1, "b": s2}, index=index, dtype="int64", copy=False)
+
+    # df should own its memory, so mutating shouldn't trigger a copy
+    arr_before = get_array(df, "a")
+    df.iloc[0, 0] = 100
+    arr_after = get_array(df, "a")
+    assert np.shares_memory(arr_before, arr_after)

From 27e8549fc2657d617fe446310dfdeac248f1c499 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 20 Jan 2023 16:13:07 +0100
Subject: [PATCH 07/16] fix reindex case

---
 pandas/core/internals/construction.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index c8eaa71de6a63..45899e8423a94 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -568,6 +568,7 @@ def _homogenize(
     for val in data:
         if isinstance(val, ABCSeries):
             hval = val
+            is_copy = False
             if dtype is not None:
                 hval = hval.astype(dtype, copy=False)
                 if using_copy_on_write() and hval.values is val.values:
@@ -575,14 +576,20 @@ def _homogenize(
                     refs = [weakref.ref(val._mgr._block)]  # type: ignore[union-attr]
                     hval._mgr.refs = refs  # type: ignore[union-attr]
                     hval._mgr.parent = val._mgr  # type: ignore[union-attr]
+                if using_copy_on_write():
+                    if hval._mgr.refs is None:  # type: ignore[union-attr]
+                        is_copy = True
             if hval.index is not index:
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later
                 hval = hval.reindex(index, copy=False)
+                if using_copy_on_write():
+                    if hval._mgr.refs is None:  # type: ignore[union-attr]
+                        is_copy = True
             if hval is val:
                 hval = val.copy(deep=False)
             homogenized.append(hval._values)
-            parents.append(hval)
+            parents.append(hval if not is_copy else None)
         else:
             if isinstance(val, dict):
                 # GH#41785 this _should_ be equivalent to (but faster than)

From 10238ba60fbf7c0204e579bc2c70d965000f8391 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 23 Jan 2023 10:56:28 +0100
Subject: [PATCH 08/16] clean-up

---
 pandas/core/internals/construction.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 45899e8423a94..1b3f905289dd2 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -59,7 +59,6 @@
     ExtensionArray,
     FloatingArray,
     IntegerArray,
-    PandasArray,
 )
 from pandas.core.arrays.string_ import StringDtype
 from pandas.core.construction import (
@@ -128,13 +127,12 @@ def arrays_to_mgr(
         #  - all(type(x) is not PandasArray for x in arrays)
 
     else:
+        # Reached via DataFrame._from_arrays; we do minimal validation here
         index = ensure_index(index)
-        arrays = [
-            arr.to_numpy() if isinstance(arr, PandasArray) else arr for arr in arrays
-        ]
+        arrays = [extract_array(x, extract_numpy=True) for x in arrays]
+        # with _from_arrays, the passed arrays should never be Series objects
         parents = None
 
-        # Reached via DataFrame._from_arrays; we do validation here
         for arr in arrays:
             if (
                 not isinstance(arr, (np.ndarray, ExtensionArray))

From 6d860bce23072c5ebd36348f6e4b35b85262e90a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 23 Jan 2023 11:00:34 +0100
Subject: [PATCH 09/16] add whatsnew

---
 doc/source/whatsnew/v2.0.0.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 605f1d4b26e13..43c2752a91bd1 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -125,6 +125,10 @@ Copy-on-Write improvements
   a modification to the data happens) when constructing a Series from an existing
   Series with the default of ``copy=False`` (:issue:`50471`)
 
+- The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary
+  of Series objects and specifying ``copy=False``, will now use_inf_as_null a lazy copy
+  of those Series objects for the columns of the DataFrame (:issue:`50777`)
+
 Copy-on-Write can be enabled through
 
 .. code-block:: python

From b24ea5f2bd7a85b560fc9838b110c12fab21ccb7 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 30 Jan 2023 16:50:48 +0100
Subject: [PATCH 10/16] add xfailed test for int64->Int64

---
 pandas/tests/copy_view/test_constructors.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
index 45b142d9e9458..7f1fe413fd3ee 100644
--- a/pandas/tests/copy_view/test_constructors.py
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -81,10 +81,12 @@ def test_series_from_series_with_reindex(using_copy_on_write):
         assert result._mgr.refs is None or result._mgr.refs[0] is None
 
 
-@pytest.mark.parametrize("dtype", [None, "int64"])
+@pytest.mark.parametrize("dtype", [None, "int64", "Int64"])
 @pytest.mark.parametrize("index", [None, [0, 1, 2]])
 @pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])
-def test_dataframe_from_dict_of_series(using_copy_on_write, columns, index, dtype):
+def test_dataframe_from_dict_of_series(
+    request, using_copy_on_write, columns, index, dtype
+):
     # Case: constructing a DataFrame from Series objects with copy=False
     # has to do a lazy following CoW rules
     # (the default for DataFrame(dict) is still to copy to ensure consolidation)
@@ -99,6 +101,12 @@ def test_dataframe_from_dict_of_series(using_copy_on_write, columns, index, dtyp
         {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
     )
 
+    if using_copy_on_write and dtype == "Int64":
+        # TODO(CoW) remove this once astype properly tracks refs
+        request.node.add_marker(
+            pytest.mark.xfail(reason="astype with Int64 is giving view")
+        )
+
     # the shallow copy still shares memory
     assert np.shares_memory(get_array(result, "a"), s1.values)
 

From 6e32a342539cec0bc0bbd83b60cf844c957550a1 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 10 Feb 2023 10:20:45 +0100
Subject: [PATCH 11/16] refactor

---
 pandas/core/internals/blocks.py       |  6 ++--
 pandas/core/internals/construction.py | 50 +++++++++------------------
 pandas/core/internals/managers.py     | 45 ++++++++----------------
 3 files changed, 34 insertions(+), 67 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index e66011acb978b..6df4d59597c7c 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -2167,7 +2167,9 @@ def get_block_type(dtype: DtypeObj):
     return cls
 
 
-def new_block_2d(values: ArrayLike, placement: BlockPlacement):
+def new_block_2d(
+    values: ArrayLike, placement: BlockPlacement, refs: BlockValuesRefs | None = None
+):
     # new_block specialized to case with
     #  ndim=2
     #  isinstance(placement, BlockPlacement)
@@ -2175,7 +2177,7 @@ def new_block_2d(values: ArrayLike, placement: BlockPlacement):
     klass = get_block_type(values.dtype)
 
     values = maybe_coerce_values(values)
-    return klass(values, ndim=2, placement=placement)
+    return klass(values, ndim=2, placement=placement, refs=refs)
 
 
 def new_block(
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 6e94c8106cb4d..89814e61cc1af 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -10,13 +10,10 @@
     Hashable,
     Sequence,
 )
-import weakref
 
 import numpy as np
 from numpy import ma
 
-from pandas._config import using_copy_on_write
-
 from pandas._libs import lib
 from pandas._typing import (
     ArrayLike,
@@ -119,7 +116,7 @@ def arrays_to_mgr(
             index = ensure_index(index)
 
         # don't force copy because getting jammed in an ndarray anyway
-        arrays, parents = _homogenize(arrays, index, dtype)
+        arrays, refs = _homogenize(arrays, index, dtype)
         # _homogenize ensures
         #  - all(len(x) == len(index) for x in arrays)
         #  - all(x.ndim == 1 for x in arrays)
@@ -131,7 +128,7 @@ def arrays_to_mgr(
         index = ensure_index(index)
         arrays = [extract_array(x, extract_numpy=True) for x in arrays]
         # with _from_arrays, the passed arrays should never be Series objects
-        parents = None
+        refs = [None] * len(arrays)
 
         for arr in arrays:
             if (
@@ -153,10 +150,7 @@ def arrays_to_mgr(
 
     if typ == "block":
         return create_block_manager_from_column_arrays(
-            arrays,
-            axes,
-            consolidate=consolidate,
-            parents=parents,
+            arrays, axes, consolidate=consolidate, refs=refs
         )
     elif typ == "array":
         return ArrayManager(arrays, [index, columns])
@@ -557,37 +551,25 @@ def _ensure_2d(values: np.ndarray) -> np.ndarray:
 
 def _homogenize(
     data, index: Index, dtype: DtypeObj | None
-) -> tuple[list[ArrayLike], list[Any] | None]:
+) -> tuple[list[ArrayLike], list[Any]]:
     oindex = None
     homogenized = []
     # if the original array-like in `data` is a Series, keep track of this Series
-    parents: list[Any] = []
+    refs: list[Any] = []
 
     for val in data:
         if isinstance(val, ABCSeries):
-            hval = val
-            is_copy = False
+            orig = val
             if dtype is not None:
-                hval = hval.astype(dtype, copy=False)
-                if using_copy_on_write() and hval.values is val.values:
-                    # TODO(CoW) remove when astype() has implemented CoW
-                    refs = [weakref.ref(val._mgr._block)]  # type: ignore[union-attr]
-                    hval._mgr.refs = refs  # type: ignore[union-attr]
-                    hval._mgr.parent = val._mgr  # type: ignore[union-attr]
-                if using_copy_on_write():
-                    if hval._mgr.refs is None:  # type: ignore[union-attr]
-                        is_copy = True
-            if hval.index is not index:
+                val = val.astype(dtype, copy=False)
+            if val.index is not index:
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later
-                hval = hval.reindex(index, copy=False)
-                if using_copy_on_write():
-                    if hval._mgr.refs is None:  # type: ignore[union-attr]
-                        is_copy = True
-            if hval is val:
-                hval = val.copy(deep=False)
-            homogenized.append(hval._values)
-            parents.append(hval if not is_copy else None)
+                val = val.reindex(index, copy=False)
+            if val is orig:
+                val = val.copy(deep=False)
+            refs.append(val._mgr._block.refs)
+            val = val._values
         else:
             if isinstance(val, dict):
                 # GH#41785 this _should_ be equivalent to (but faster than)
@@ -605,10 +587,10 @@ def _homogenize(
 
             val = sanitize_array(val, index, dtype=dtype, copy=False)
             com.require_length_match(val, index)
-            homogenized.append(val)
-            parents.append(None)
+            refs.append(None)
+        homogenized.append(val)
 
-    return homogenized, None if com.all_none(*parents) else parents
+    return homogenized, refs
 
 
 def _extract_index(data) -> Index:
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index bb445e3d5b8be..df77a877f0842 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -2076,7 +2076,7 @@ def create_block_manager_from_column_arrays(
     arrays: list[ArrayLike],
     axes: list[Index],
     consolidate: bool = True,
-    parents: list | None = None,
+    refs: list = None,
 ) -> BlockManager:
     # Assertions disabled for performance (caller is responsible for verifying)
     # assert isinstance(axes, list)
@@ -2090,31 +2090,8 @@ def create_block_manager_from_column_arrays(
     #  verify_integrity=False below.
 
     try:
-        blocks = _form_blocks(arrays, consolidate)
-        refs = None
-        parent = None
-        if parents is not None and using_copy_on_write():
-            # elements in `parents` are Series objects *if* the original input
-            # for the column was a Series, or otherwise None
-            # -> in case of a Series, keep track of its refs if it has those
-            # (this Series is already a view on the original one, so we can
-            # directly use its ref instead of creating a new ref to this Series)
-            refs = []
-            parent = []
-            for ser in parents:
-                if (
-                    ser is not None
-                    and ser._mgr.refs is not None
-                    and (ref := ser._mgr.refs[0]) is not None
-                ):
-                    refs.append(ref)
-                    parent.append(ser)
-                else:
-                    refs.append(None)
-
-        mgr = BlockManager(
-            blocks, axes, refs=refs, parent=parent, verify_integrity=False
-        )
+        blocks = _form_blocks(arrays, consolidate, refs)
+        mgr = BlockManager(blocks, axes, verify_integrity=False)
     except ValueError as e:
         raise_construction_error(len(arrays), arrays[0].shape, axes, e)
     if consolidate:
@@ -2167,13 +2144,17 @@ def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, bool, DtypeObj]:
     return sep, isinstance(dtype, np.dtype), dtype
 
 
-def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]:
+def _form_blocks(arrays: list[ArrayLike], consolidate: bool, refs: list) -> list[Block]:
     tuples = list(enumerate(arrays))
 
     if not consolidate:
-        nbs = _tuples_to_blocks_no_consolidate(tuples)
+        nbs = _tuples_to_blocks_no_consolidate(tuples, refs)
         return nbs
 
+    # when consolidating, we can ignore refs (either stacking always copies,
+    # or the EA is already copied in the calling dict_to_mgr)
+    # TODO(CoW) check if this is also valid for rec_array_to_mgr
+
     # group by dtype
     grouper = itertools.groupby(tuples, _grouping_func)
 
@@ -2211,11 +2192,13 @@ def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]:
     return nbs
 
 
-def _tuples_to_blocks_no_consolidate(tuples) -> list[Block]:
+def _tuples_to_blocks_no_consolidate(tuples, refs) -> list[Block]:
     # tuples produced within _form_blocks are of the form (placement, array)
     return [
-        new_block_2d(ensure_block_shape(x[1], ndim=2), placement=BlockPlacement(x[0]))
-        for x in tuples
+        new_block_2d(
+            ensure_block_shape(arr, ndim=2), placement=BlockPlacement(i), refs=ref
+        )
+        for ((i, arr), ref) in zip(tuples, refs)
     ]
 
 

From 82bc4bb612379356a61ff31d0a0f018a06d1482c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 10 Feb 2023 10:24:03 +0100
Subject: [PATCH 12/16] cleanup

---
 pandas/core/internals/construction.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 89814e61cc1af..e54176aee3a26 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -124,12 +124,12 @@ def arrays_to_mgr(
         #  - all(type(x) is not PandasArray for x in arrays)
 
     else:
-        # Reached via DataFrame._from_arrays; we do minimal validation here
         index = ensure_index(index)
         arrays = [extract_array(x, extract_numpy=True) for x in arrays]
         # with _from_arrays, the passed arrays should never be Series objects
         refs = [None] * len(arrays)
 
+        # Reached via DataFrame._from_arrays; we do minimal validation here
         for arr in arrays:
             if (
                 not isinstance(arr, (np.ndarray, ExtensionArray))
@@ -554,7 +554,7 @@ def _homogenize(
 ) -> tuple[list[ArrayLike], list[Any]]:
     oindex = None
     homogenized = []
-    # if the original array-like in `data` is a Series, keep track of this Series
+    # if the original array-like in `data` is a Series, keep track of this Series' refs
     refs: list[Any] = []
 
     for val in data:
@@ -588,6 +588,7 @@ def _homogenize(
             val = sanitize_array(val, index, dtype=dtype, copy=False)
             com.require_length_match(val, index)
             refs.append(None)
+
         homogenized.append(val)
 
     return homogenized, refs

From 7fc85945d18b50547c202301510cc9c9931de2e5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 10 Feb 2023 10:52:24 +0100
Subject: [PATCH 13/16] fix for AM

---
 pandas/core/internals/construction.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index e54176aee3a26..b8bc7adb9bfcc 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -568,7 +568,10 @@ def _homogenize(
                 val = val.reindex(index, copy=False)
             if val is orig:
                 val = val.copy(deep=False)
-            refs.append(val._mgr._block.refs)
+            if isinstance(val._mgr, BlockManager):
+                refs.append(val._mgr._block.refs)
+            else:
+                refs.append(None)
             val = val._values
         else:
             if isinstance(val, dict):

From 3d2d5e6fd0414421314471ebe183842672c0b631 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 10 Feb 2023 12:46:40 +0100
Subject: [PATCH 14/16] fix + address feedback

---
 pandas/core/internals/construction.py       |  5 +----
 pandas/tests/copy_view/test_constructors.py | 10 +++++++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index b8bc7adb9bfcc..e5b2e5ce42fcd 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -559,16 +559,13 @@ def _homogenize(
 
     for val in data:
         if isinstance(val, ABCSeries):
-            orig = val
             if dtype is not None:
                 val = val.astype(dtype, copy=False)
             if val.index is not index:
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later
                 val = val.reindex(index, copy=False)
-            if val is orig:
-                val = val.copy(deep=False)
-            if isinstance(val._mgr, BlockManager):
+            if isinstance(val._mgr, SingleBlockManager):
                 refs.append(val._mgr._block.refs)
             else:
                 refs.append(None)
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
index fc59ac271430e..2cacf0d6f6f91 100644
--- a/pandas/tests/copy_view/test_constructors.py
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -103,23 +103,25 @@ def test_dataframe_from_dict_of_series(
     )
 
     # the shallow copy still shares memory
-    assert np.shares_memory(get_array(result, "a"), s1.values)
+    assert np.shares_memory(get_array(result, "a"), get_array(s1))
 
     # mutating the new dataframe doesn't mutate original
     result.iloc[0, 0] = 10
     if using_copy_on_write:
-        assert not np.shares_memory(get_array(result, "a"), s1.values)
+        assert not np.shares_memory(get_array(result, "a"), get_array(s1))
         tm.assert_series_equal(s1, s1_orig)
     else:
         assert s1.iloc[0] == 10
 
     # the same when modifying the parent series
+    s1 = Series([1, 2, 3])
+    s2 = Series([4, 5, 6])
     result = DataFrame(
         {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
     )
     s1.iloc[0] = 10
     if using_copy_on_write:
-        assert not np.shares_memory(get_array(result, "a"), s1.values)
+        assert not np.shares_memory(get_array(result, "a"), get_array(s1))
         tm.assert_frame_equal(result, expected)
     else:
         assert result.iloc[0, 0] == 10
@@ -137,6 +139,7 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
 
     # df should own its memory, so mutating shouldn't trigger a copy
     arr_before = get_array(df, "a")
+    assert not np.shares_memory(arr_before, get_array(s1))
     df.iloc[0, 0] = 100
     arr_after = get_array(df, "a")
     assert np.shares_memory(arr_before, arr_after)
@@ -153,6 +156,7 @@ def test_dataframe_from_dict_of_series_with_dtype(index):
 
     # df should own its memory, so mutating shouldn't trigger a copy
     arr_before = get_array(df, "a")
+    assert not np.shares_memory(arr_before, get_array(s1))
     df.iloc[0, 0] = 100
     arr_after = get_array(df, "a")
     assert np.shares_memory(arr_before, arr_after)

From 48a00a111f1ccc09542eb345bc82394e1e63023d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 10 Feb 2023 14:18:16 +0100
Subject: [PATCH 15/16] fixup typing

---
 pandas/core/internals/managers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index ef4f2149d8ac7..15f721a774e72 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -2072,7 +2072,7 @@ def create_block_manager_from_column_arrays(
     arrays: list[ArrayLike],
     axes: list[Index],
     consolidate: bool = True,
-    refs: list = None,
+    refs: list | None = None,
 ) -> BlockManager:
     # Assertions disabled for performance (caller is responsible for verifying)
     # assert isinstance(axes, list)

From 5476780e67dbdc3c52e0352259d160199968874a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 10 Feb 2023 15:39:48 +0100
Subject: [PATCH 16/16] try fix typing

---
 pandas/core/internals/managers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 15f721a774e72..95ce4774bfe1d 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -2071,8 +2071,8 @@ def create_block_manager_from_blocks(
 def create_block_manager_from_column_arrays(
     arrays: list[ArrayLike],
     axes: list[Index],
-    consolidate: bool = True,
-    refs: list | None = None,
+    consolidate: bool,
+    refs: list,
 ) -> BlockManager:
     # Assertions disabled for performance (caller is responsible for verifying)
     # assert isinstance(axes, list)