From 7b892bd22d1f37e70db2ad23408251a426229143 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 19 Jun 2020 10:35:11 -0500
Subject: [PATCH 1/8] API: Honor copy for dict-input in DataFrame

Closes https://github.com/pandas-dev/pandas/issues/32960
---
 pandas/core/frame.py                    |  9 +++++++--
 pandas/core/internals/construction.py   | 18 ++++++++++++------
 pandas/core/internals/managers.py       | 11 +++++++----
 pandas/tests/frame/test_constructors.py | 24 ++++++++++++++++++++++--
 4 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 39ca7ed47f7fa..1a1f6d2f83cd8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -359,7 +359,12 @@ class DataFrame(NDFrame):
     dtype : dtype, default None
         Data type to force. Only a single dtype is allowed. If None, infer.
     copy : bool, default False
-        Copy data from inputs. Only affects DataFrame / 2d ndarray input.
+        Copy data from inputs. This only applies to specific cases.
+
+        * `data` is a DataFrame or 2D NumPy array
+        * `data` is a dict with at most one column per NumPy dtype.
+
+        Or all other cases, zero-copy construction cannot be ensured.
 
     See Also
     --------
@@ -456,7 +461,7 @@ def __init__(
             )
 
         elif isinstance(data, dict):
-            mgr = init_dict(data, index, columns, dtype=dtype)
+            mgr = init_dict(data, index, columns, dtype=dtype, copy=copy)
         elif isinstance(data, ma.MaskedArray):
             import numpy.ma.mrecords as mrecords
 
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index d49f1f154a2c1..60e061449a25f 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -64,6 +64,7 @@ def arrays_to_mgr(
     columns,
     dtype: Optional[DtypeObj] = None,
     verify_integrity: bool = True,
+    copy: bool = False,
 ):
     """
     Segregate Series based on type and coerce into matrices.
@@ -80,7 +81,7 @@ def arrays_to_mgr(
             index = ensure_index(index)
 
         # don't force copy because getting jammed in an ndarray anyway
-        arrays = _homogenize(arrays, index, dtype)
+        arrays = _homogenize(arrays, index, dtype, copy=copy)
 
         columns = ensure_index(columns)
     else:
@@ -234,7 +235,9 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
     return create_block_manager_from_blocks(block_values, [columns, index])
 
 
-def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
+def init_dict(
+    data: Dict, index, columns, dtype: Optional[DtypeObj] = None, copy: bool = False
+):
     """
     Segregate Series based on type and coerce into matrices.
     Needs to handle a lot of exceptional cases.
@@ -272,6 +275,7 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
         keys = list(data.keys())
         columns = data_names = Index(keys)
         arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
+        # breakpoint()
         # GH#24096 need copy to be deep for datetime64tz case
         # TODO: See if we can avoid these copies
         arrays = [
@@ -280,7 +284,7 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
         arrays = [
             arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
         ]
-    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
+    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype, copy=copy)
 
 
 # ---------------------------------------------------------------------
@@ -326,14 +330,16 @@ def convert(v):
     return values
 
 
-def _homogenize(data, index, dtype: Optional[DtypeObj]):
+def _homogenize(data, index, dtype: Optional[DtypeObj], copy: bool = False):
     oindex = None
     homogenized = []
 
     for val in data:
         if isinstance(val, ABCSeries):
             if dtype is not None:
-                val = val.astype(dtype)
+                val = val.astype(dtype, copy=copy)
+            elif copy:
+                val = val.copy()
             if val.index is not index:
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later
@@ -349,7 +355,7 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]):
                     val = dict(val)
                 val = lib.fast_multiget(val, oindex._values, default=np.nan)
             val = sanitize_array(
-                val, index, dtype=dtype, copy=False, raise_cast_failure=False
+                val, index, dtype=dtype, copy=copy, raise_cast_failure=False
             )
 
         homogenized.append(val)
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index eaf59051205d6..697d7013409cf 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1817,10 +1817,13 @@ def _shape_compat(x):
 
     first = arrays[0]
     shape = (len(arrays),) + _shape_compat(first)
-
-    stacked = np.empty(shape, dtype=dtype)
-    for i, arr in enumerate(arrays):
-        stacked[i] = _asarray_compat(arr)
+    if len(arrays) == 1:
+        # allow for 0-copy construction from dict
+        stacked = _asarray_compat(first).reshape(shape)
+    else:
+        stacked = np.empty(shape, dtype=dtype)
+        for i, arr in enumerate(arrays):
+            stacked[i] = _asarray_compat(arr)
 
     return stacked, placement
 
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index baac87755c6d2..45cdae9dea1df 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -1909,12 +1909,16 @@ def test_constructor_ndarray_copy(self, float_frame):
         assert not (df.values[6] == 6).all()
 
     def test_constructor_series_copy(self, float_frame):
-        series = float_frame._series
+        series = float_frame._series.copy()
+
+        df = DataFrame({"A": series["A"]}, copy=True)
+        df["A"][:] = 5
+        assert not (series["A"] == 5).all()
 
         df = DataFrame({"A": series["A"]})
         df["A"][:] = 5
 
-        assert not (series["A"] == 5).all()
+        assert (series["A"] == 5).all()
 
     def test_constructor_with_nas(self):
         # GH 5016
@@ -2679,3 +2683,19 @@ def test_construction_from_set_raises(self):
         msg = "Set type is unordered"
         with pytest.raises(TypeError, match=msg):
             pd.DataFrame({"a": {1, 2, 3}})
+
+
+@pytest.mark.parametrize("copy", [False, True])
+def test_dict_nocopy(copy):
+    a = np.array([1, 2])
+    b = pd.array([1, 2])
+    df = pd.DataFrame({"a": a, "b": b}, copy=copy)
+    df.iloc[0, 0] = 0
+    df.iloc[0, 1] = 0
+
+    if copy:
+        assert a[0] == 1
+        assert b[0] == 1
+    else:
+        assert a[0] == 0
+        assert b[0] == 0

From acf99dd7dd573970d47badaed57dbb6466e9bfd3 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 19 Jun 2020 14:07:26 -0500
Subject: [PATCH 2/8] Fixups

---
 doc/source/whatsnew/v1.1.0.rst          |  1 +
 pandas/core/frame.py                    | 25 ++++++++++++++-----
 pandas/core/internals/construction.py   |  1 -
 pandas/tests/frame/test_constructors.py | 33 ++++++++++++-------------
 4 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index a27e6e8433779..209198698bc45 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -261,6 +261,7 @@ Other enhancements
 - :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`)
 - :meth:`MultiIndex.union` will now raise `RuntimeWarning` if the object inside are unsortable, pass `sort=False` to suppress this warning (:issue:`33015`)
 - :class:`Series.dt` and :class:`DatatimeIndex` now have an `isocalendar` method that returns a :class:`DataFrame` with year, week, and day calculated according to the ISO 8601 calendar (:issue:`33206`, :issue:`34392`).
+- The :meth:`DataFrame` constructor now uses ``copy`` for dict-inputs to control whether copies of the arrays are made (:issue:`32960`)
 - The :meth:`DataFrame.to_feather` method now supports additional keyword
   arguments (e.g. to set the compression) that are added in pyarrow 0.17
   (:issue:`33422`).
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1a1f6d2f83cd8..1c9ab3f046752 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -358,13 +358,17 @@ class DataFrame(NDFrame):
         RangeIndex (0, 1, 2, ..., n) if no column labels are provided.
     dtype : dtype, default None
         Data type to force. Only a single dtype is allowed. If None, infer.
-    copy : bool, default False
-        Copy data from inputs. This only applies to specific cases.
+    copy : bool, optional
+        Copy data from inputs. This only applies for specific types of `data`
+        and the default behavior depends on the type of data.
 
-        * `data` is a DataFrame or 2D NumPy array
-        * `data` is a dict with at most one column per NumPy dtype.
+        * `data` is a DataFrame or 2D NumPy array: do *not* copy by default.
+           Specifying ``copy=True`` will copy the data.
+        * `data` is a dict with at most one column per NumPy dtype: copy by default.
+           Specifying ``copy=False`` will not copy any of the data.
 
-        Or all other cases, zero-copy construction cannot be ensured.
+        For all other cases, zero-copy construction cannot be ensured and `copy`
+        has no effect.
 
     See Also
     --------
@@ -440,7 +444,7 @@ def __init__(
         index: Optional[Axes] = None,
         columns: Optional[Axes] = None,
         dtype: Optional[Dtype] = None,
-        copy: bool = False,
+        copy: Optional[bool] = None,
     ):
         if data is None:
             data = {}
@@ -451,6 +455,7 @@ def __init__(
             data = data._mgr
 
         if isinstance(data, BlockManager):
+            copy = bool(copy)  # None -> False
             if index is None and columns is None and dtype is None and copy is False:
                 # GH#33357 fastpath
                 NDFrame.__init__(self, data)
@@ -461,10 +466,15 @@ def __init__(
             )
 
         elif isinstance(data, dict):
+            if copy is None:
+                # Copy by default for dict
+                copy = True
             mgr = init_dict(data, index, columns, dtype=dtype, copy=copy)
         elif isinstance(data, ma.MaskedArray):
             import numpy.ma.mrecords as mrecords
 
+            copy = bool(copy)  # None -> False
+
             # masked recarray
             if isinstance(data, mrecords.MaskedRecords):
                 mgr = masked_rec_array_to_mgr(data, index, columns, dtype, copy)
@@ -481,6 +491,7 @@ def __init__(
                 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
 
         elif isinstance(data, (np.ndarray, Series, Index)):
+            copy = bool(copy)  # None -> False
             if data.dtype.names:
                 data_columns = list(data.dtype.names)
                 data = {k: data[k] for k in data_columns}
@@ -494,6 +505,7 @@ def __init__(
 
         # For data is list-like, or Iterable (will consume into list)
         elif isinstance(data, abc.Iterable) and not isinstance(data, (str, bytes)):
+            copy = bool(copy)  # None -> False
             if not isinstance(data, (abc.Sequence, ExtensionArray)):
                 data = list(data)
             if len(data) > 0:
@@ -520,6 +532,7 @@ def __init__(
             else:
                 mgr = init_dict({}, index, columns, dtype=dtype)
         else:
+            copy = bool(copy)  # None -> False
             try:
                 arr = np.array(data, dtype=dtype, copy=copy)
             except (ValueError, TypeError) as err:
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 60e061449a25f..b030cf753c785 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -275,7 +275,6 @@ def init_dict(
         keys = list(data.keys())
         columns = data_names = Index(keys)
         arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
-        # breakpoint()
         # GH#24096 need copy to be deep for datetime64tz case
         # TODO: See if we can avoid these copies
         arrays = [
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 45cdae9dea1df..b242f89a01468 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -1909,13 +1909,13 @@ def test_constructor_ndarray_copy(self, float_frame):
         assert not (df.values[6] == 6).all()
 
     def test_constructor_series_copy(self, float_frame):
-        series = float_frame._series.copy()
+        series = float_frame._series
 
         df = DataFrame({"A": series["A"]}, copy=True)
         df["A"][:] = 5
         assert not (series["A"] == 5).all()
 
-        df = DataFrame({"A": series["A"]})
+        df = DataFrame({"A": series["A"]}, copy=False)
         df["A"][:] = 5
 
         assert (series["A"] == 5).all()
@@ -2684,18 +2684,17 @@ def test_construction_from_set_raises(self):
         with pytest.raises(TypeError, match=msg):
             pd.DataFrame({"a": {1, 2, 3}})
 
-
-@pytest.mark.parametrize("copy", [False, True])
-def test_dict_nocopy(copy):
-    a = np.array([1, 2])
-    b = pd.array([1, 2])
-    df = pd.DataFrame({"a": a, "b": b}, copy=copy)
-    df.iloc[0, 0] = 0
-    df.iloc[0, 1] = 0
-
-    if copy:
-        assert a[0] == 1
-        assert b[0] == 1
-    else:
-        assert a[0] == 0
-        assert b[0] == 0
+    @pytest.mark.parametrize("copy", [None, False, True])
+    def test_dict_nocopy(self, copy):
+        a = np.array([1, 2])
+        b = pd.array([1, 2])
+        df = pd.DataFrame({"a": a, "b": b}, copy=copy)
+        df.iloc[0, 0] = 0
+        df.iloc[0, 1] = 0
+
+        if copy is True or copy is None:
+            assert a[0] == 1
+            assert b[0] == 1
+        else:
+            assert a[0] == 0
+            assert b[0] == 0

From 499080b0c89f4f59c77473287f8d5d8dabe30dd6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 19 Jun 2020 14:12:49 -0500
Subject: [PATCH 3/8] copy

---
 pandas/tests/frame/test_constructors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index b242f89a01468..db7950e6dbe81 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -1911,7 +1911,7 @@ def test_constructor_ndarray_copy(self, float_frame):
     def test_constructor_series_copy(self, float_frame):
         series = float_frame._series
 
-        df = DataFrame({"A": series["A"]}, copy=True)
+        df = DataFrame({"A": series["A"]})  # copy by default
         df["A"][:] = 5
         assert not (series["A"] == 5).all()
 

From 20c87ce10569c2247b664604c8d918144e3bee84 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 19 Jun 2020 14:49:02 -0500
Subject: [PATCH 4/8] fixup

---
 pandas/tests/extension/test_sparse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
index f318934ef5e52..ac933e342bcff 100644
--- a/pandas/tests/extension/test_sparse.py
+++ b/pandas/tests/extension/test_sparse.py
@@ -259,7 +259,7 @@ def test_combine_le(self, data_repeated):
 
     def test_fillna_copy_frame(self, data_missing):
         arr = data_missing.take([1, 1])
-        df = pd.DataFrame({"A": arr})
+        df = pd.DataFrame({"A": arr}, copy=False)
 
         filled_val = df.iloc[0, 0]
         result = df.fillna(filled_val)

From b0b125d471ed01a86c77a714754e67b01c2479b5 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 19 Jun 2020 14:56:44 -0500
Subject: [PATCH 5/8] simplify

---
 pandas/core/frame.py                    | 22 ++++++----------------
 pandas/tests/extension/test_sparse.py   |  2 +-
 pandas/tests/frame/test_constructors.py | 12 ++++++------
 3 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1c9ab3f046752..b2cf3781a1457 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -358,14 +358,13 @@ class DataFrame(NDFrame):
         RangeIndex (0, 1, 2, ..., n) if no column labels are provided.
     dtype : dtype, default None
         Data type to force. Only a single dtype is allowed. If None, infer.
-    copy : bool, optional
-        Copy data from inputs. This only applies for specific types of `data`
-        and the default behavior depends on the type of data.
+    copy : bool, default False
+        Copy data from inputs. This only applies for specific types of `data`.
 
-        * `data` is a DataFrame or 2D NumPy array: do *not* copy by default.
+        * `data` is a DataFrame or 2D NumPy array
            Specifying ``copy=True`` will copy the data.
-        * `data` is a dict with at most one column per NumPy dtype: copy by default.
-           Specifying ``copy=False`` will not copy any of the data.
+        * `data` is a dict with at most one column per NumPy dtype
+           Specifying ``copy=True`` will copy all of the values.
 
         For all other cases, zero-copy construction cannot be ensured and `copy`
         has no effect.
@@ -444,7 +443,7 @@ def __init__(
         index: Optional[Axes] = None,
         columns: Optional[Axes] = None,
         dtype: Optional[Dtype] = None,
-        copy: Optional[bool] = None,
+        copy: bool = False,
     ):
         if data is None:
             data = {}
@@ -455,7 +454,6 @@ def __init__(
             data = data._mgr
 
         if isinstance(data, BlockManager):
-            copy = bool(copy)  # None -> False
             if index is None and columns is None and dtype is None and copy is False:
                 # GH#33357 fastpath
                 NDFrame.__init__(self, data)
@@ -466,15 +464,10 @@ def __init__(
             )
 
         elif isinstance(data, dict):
-            if copy is None:
-                # Copy by default for dict
-                copy = True
             mgr = init_dict(data, index, columns, dtype=dtype, copy=copy)
         elif isinstance(data, ma.MaskedArray):
             import numpy.ma.mrecords as mrecords
 
-            copy = bool(copy)  # None -> False
-
             # masked recarray
             if isinstance(data, mrecords.MaskedRecords):
                 mgr = masked_rec_array_to_mgr(data, index, columns, dtype, copy)
@@ -491,7 +484,6 @@ def __init__(
                 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
 
         elif isinstance(data, (np.ndarray, Series, Index)):
-            copy = bool(copy)  # None -> False
             if data.dtype.names:
                 data_columns = list(data.dtype.names)
                 data = {k: data[k] for k in data_columns}
@@ -505,7 +497,6 @@ def __init__(
 
         # For data is list-like, or Iterable (will consume into list)
         elif isinstance(data, abc.Iterable) and not isinstance(data, (str, bytes)):
-            copy = bool(copy)  # None -> False
             if not isinstance(data, (abc.Sequence, ExtensionArray)):
                 data = list(data)
             if len(data) > 0:
@@ -532,7 +523,6 @@ def __init__(
             else:
                 mgr = init_dict({}, index, columns, dtype=dtype)
         else:
-            copy = bool(copy)  # None -> False
             try:
                 arr = np.array(data, dtype=dtype, copy=copy)
             except (ValueError, TypeError) as err:
diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
index ac933e342bcff..f318934ef5e52 100644
--- a/pandas/tests/extension/test_sparse.py
+++ b/pandas/tests/extension/test_sparse.py
@@ -259,7 +259,7 @@ def test_combine_le(self, data_repeated):
 
     def test_fillna_copy_frame(self, data_missing):
         arr = data_missing.take([1, 1])
-        df = pd.DataFrame({"A": arr}, copy=False)
+        df = pd.DataFrame({"A": arr})
 
         filled_val = df.iloc[0, 0]
         result = df.fillna(filled_val)
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index db7950e6dbe81..91be73146aba3 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -1911,14 +1911,14 @@ def test_constructor_ndarray_copy(self, float_frame):
     def test_constructor_series_copy(self, float_frame):
         series = float_frame._series
 
-        df = DataFrame({"A": series["A"]})  # copy by default
+        df = DataFrame({"A": series["A"]}, copy=True)
         df["A"][:] = 5
-        assert not (series["A"] == 5).all()
 
-        df = DataFrame({"A": series["A"]}, copy=False)
-        df["A"][:] = 5
+        assert not all(series["A"] == 5).all()
 
-        assert (series["A"] == 5).all()
+        df = DataFrame({"A": series["A"]})  # no copy by default
+        df["A"][:] = 5
+        assert all(series["A"] == 5).all()
 
     def test_constructor_with_nas(self):
         # GH 5016
@@ -2692,7 +2692,7 @@ def test_dict_nocopy(self, copy):
         df.iloc[0, 0] = 0
         df.iloc[0, 1] = 0
 
-        if copy is True or copy is None:
+        if copy:
             assert a[0] == 1
             assert b[0] == 1
         else:

From f9b3f16508a22e0ff8030427eaee7a080c8b09ca Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 19 Jun 2020 15:31:30 -0500
Subject: [PATCH 6/8] optional

---
 pandas/core/construction.py             | 13 ++++++++++++-
 pandas/core/frame.py                    | 24 ++++++++++++++++++------
 pandas/core/internals/construction.py   | 10 +++++++---
 pandas/tests/frame/test_constructors.py | 11 ++++++++---
 4 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index b110a316a76d9..5b4b0d9f91081 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -390,7 +390,7 @@ def sanitize_array(
     data,
     index: Optional["Index"],
     dtype: Optional[DtypeObj] = None,
-    copy: bool = False,
+    copy: Optional[bool] = False,
     raise_cast_failure: bool = False,
 ) -> ArrayLike:
     """
@@ -412,6 +412,9 @@ def sanitize_array(
 
     # GH#846
     if isinstance(data, np.ndarray):
+        if copy is None:
+            # copy by default for DataFrame({"A": ndarray})
+            copy = True
 
         if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype):
             # possibility of nan -> garbage
@@ -428,15 +431,20 @@ def sanitize_array(
 
     elif isinstance(data, ABCExtensionArray):
         # it is already ensured above this is not a PandasArray
+        # no copy by default for DataFrame({"A": ndarray})
+        if copy is None:
+            copy = False
         subarr = data
 
         if dtype is not None:
             subarr = subarr.astype(dtype, copy=copy)
         elif copy:
+            # no copy by default from DataFrame.__init__
             subarr = subarr.copy()
         return subarr
 
     elif isinstance(data, (list, tuple)) and len(data) > 0:
+        copy = bool(copy)  # None -> False
         if dtype is not None:
             subarr = _try_cast(data, dtype, copy, raise_cast_failure)
         else:
@@ -446,16 +454,19 @@ def sanitize_array(
 
     elif isinstance(data, range):
         # GH#16804
+        copy = bool(copy)  # None -> False
         arr = np.arange(data.start, data.stop, data.step, dtype="int64")
         subarr = _try_cast(arr, dtype, copy, raise_cast_failure)
     elif isinstance(data, abc.Set):
         raise TypeError("Set type is unordered")
     elif lib.is_scalar(data) and index is not None and dtype is not None:
+        copy = bool(copy)  # None -> False
         data = maybe_cast_to_datetime(data, dtype)
         if not lib.is_scalar(data):
             data = data[0]
         subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)
     else:
+        copy = bool(copy)  # None -> False
         subarr = _try_cast(data, dtype, copy, raise_cast_failure)
 
     # scalar like, GH
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b2cf3781a1457..1ad96c9c4c33f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -358,13 +358,19 @@ class DataFrame(NDFrame):
         RangeIndex (0, 1, 2, ..., n) if no column labels are provided.
     dtype : dtype, default None
         Data type to force. Only a single dtype is allowed. If None, infer.
-    copy : bool, default False
-        Copy data from inputs. This only applies for specific types of `data`.
+    copy : bool, optional
+        Copy data from inputs. This only applies for specific types of `data`
+        and the default behavior depends on `data`.
 
-        * `data` is a DataFrame or 2D NumPy array
+        * `data` is a DataFrame or 2D NumPy array: *no* copy by default.
            Specifying ``copy=True`` will copy the data.
-        * `data` is a dict with at most one column per NumPy dtype
-           Specifying ``copy=True`` will copy all of the values.
+        * `data` is a dict:
+           By default arrays in `data` with with NumPy dtypes in `data` are
+           copied, while extension types are not. Specifying ``copy=True``
+           will copy all of the values, and ``copy=False`` will attempt to
+           not copy the data. Note that if `data` has more than one value with
+           the same NumPy dtype then then data will be copied, regardless of
+           the value of `copy`.
 
         For all other cases, zero-copy construction cannot be ensured and `copy`
         has no effect.
@@ -443,7 +449,7 @@ def __init__(
         index: Optional[Axes] = None,
         columns: Optional[Axes] = None,
         dtype: Optional[Dtype] = None,
-        copy: bool = False,
+        copy: Optional[bool] = None,
     ):
         if data is None:
             data = {}
@@ -454,6 +460,7 @@ def __init__(
             data = data._mgr
 
         if isinstance(data, BlockManager):
+            copy = bool(copy)  # None -> False
             if index is None and columns is None and dtype is None and copy is False:
                 # GH#33357 fastpath
                 NDFrame.__init__(self, data)
@@ -468,6 +475,8 @@ def __init__(
         elif isinstance(data, ma.MaskedArray):
             import numpy.ma.mrecords as mrecords
 
+            copy = bool(copy)  # None -> False
+
             # masked recarray
             if isinstance(data, mrecords.MaskedRecords):
                 mgr = masked_rec_array_to_mgr(data, index, columns, dtype, copy)
@@ -484,6 +493,7 @@ def __init__(
                 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
 
         elif isinstance(data, (np.ndarray, Series, Index)):
+            copy = bool(copy)  # None -> False
             if data.dtype.names:
                 data_columns = list(data.dtype.names)
                 data = {k: data[k] for k in data_columns}
@@ -497,6 +507,7 @@ def __init__(
 
         # For data is list-like, or Iterable (will consume into list)
         elif isinstance(data, abc.Iterable) and not isinstance(data, (str, bytes)):
+            copy = bool(copy)  # None -> False
             if not isinstance(data, (abc.Sequence, ExtensionArray)):
                 data = list(data)
             if len(data) > 0:
@@ -523,6 +534,7 @@ def __init__(
             else:
                 mgr = init_dict({}, index, columns, dtype=dtype)
         else:
+            copy = bool(copy)  # None -> False
             try:
                 arr = np.array(data, dtype=dtype, copy=copy)
             except (ValueError, TypeError) as err:
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index b030cf753c785..cfacc37389986 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -64,7 +64,7 @@ def arrays_to_mgr(
     columns,
     dtype: Optional[DtypeObj] = None,
     verify_integrity: bool = True,
-    copy: bool = False,
+    copy: Optional[bool] = False,
 ):
     """
     Segregate Series based on type and coerce into matrices.
@@ -236,7 +236,11 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
 
 
 def init_dict(
-    data: Dict, index, columns, dtype: Optional[DtypeObj] = None, copy: bool = False
+    data: Dict,
+    index,
+    columns,
+    dtype: Optional[DtypeObj] = None,
+    copy: Optional[bool] = False,
 ):
     """
     Segregate Series based on type and coerce into matrices.
@@ -329,7 +333,7 @@ def convert(v):
     return values
 
 
-def _homogenize(data, index, dtype: Optional[DtypeObj], copy: bool = False):
+def _homogenize(data, index, dtype: Optional[DtypeObj], copy: Optional[bool] = False):
     oindex = None
     homogenized = []
 
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 91be73146aba3..b8aa98d309942 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -1914,11 +1914,11 @@ def test_constructor_series_copy(self, float_frame):
         df = DataFrame({"A": series["A"]}, copy=True)
         df["A"][:] = 5
 
-        assert not all(series["A"] == 5).all()
+        assert not (series["A"] == 5).all()
 
         df = DataFrame({"A": series["A"]})  # no copy by default
         df["A"][:] = 5
-        assert all(series["A"] == 5).all()
+        assert (series["A"] == 5).all()
 
     def test_constructor_with_nas(self):
         # GH 5016
@@ -2692,7 +2692,12 @@ def test_dict_nocopy(self, copy):
         df.iloc[0, 0] = 0
         df.iloc[0, 1] = 0
 
-        if copy:
+        if copy is None:
+            # copy for ndarray, no copy for EA
+            assert a[0] == 1
+            assert b[0] == 0
+
+        elif copy:
             assert a[0] == 1
             assert b[0] == 1
         else:

From 306d015312f49bd8e1a5c0cb67e31b5a02ca20af Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 22 Jun 2020 11:21:12 -0500
Subject: [PATCH 7/8] Fixup

---
 doc/source/whatsnew/v1.1.0.rst | 2 +-
 pandas/core/frame.py           | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 858b6d01ed955..35f0b85188348 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -261,7 +261,7 @@ Other enhancements
 - :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`)
 - :meth:`MultiIndex.union` will now raise `RuntimeWarning` if the object inside are unsortable, pass `sort=False` to suppress this warning (:issue:`33015`)
 - :class:`Series.dt` and :class:`DatatimeIndex` now have an `isocalendar` method that returns a :class:`DataFrame` with year, week, and day calculated according to the ISO 8601 calendar (:issue:`33206`, :issue:`34392`).
-- The :meth:`DataFrame` constructor now uses ``copy`` for dict-inputs to control whether copies of the arrays are made (:issue:`32960`)
+- The :class:`DataFrame` constructor now uses ``copy`` for dict-inputs to control whether copies of the arrays are made (:issue:`32960`)
 - The :meth:`DataFrame.to_feather` method now supports additional keyword
   arguments (e.g. to set the compression) that are added in pyarrow 0.17
   (:issue:`33422`).
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d638c58a978b3..dfeccebff78cf 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -365,15 +365,15 @@ class DataFrame(NDFrame):
         * `data` is a DataFrame or 2D NumPy array: *no* copy by default.
            Specifying ``copy=True`` will copy the data.
         * `data` is a dict:
-           By default arrays in `data` with with NumPy dtypes in `data` are
+           By default arrays in `data` with NumPy dtypes in `data` are
            copied, while extension types are not. Specifying ``copy=True``
            will copy all of the values, and ``copy=False`` will attempt to
            not copy the data. Note that if `data` has more than one value with
            the same NumPy dtype then then data will be copied, regardless of
            the value of `copy`.
 
-        For all other cases, zero-copy construction cannot be ensured and `copy`
-        has no effect.
+        For all other types of `data`, zero-copy construction cannot be ensured
+        and `copy` has no effect.
 
     See Also
     --------

From 9f716c829212c05eb0a5b916682c56b6f2228217 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 12 Jul 2020 15:57:06 -0500
Subject: [PATCH 8/8] fix comment

---
 pandas/core/construction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index ec40d7c44bc3e..f9a111c14a666 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -431,7 +431,7 @@ def sanitize_array(
 
     elif isinstance(data, ABCExtensionArray):
         # it is already ensured above this is not a PandasArray
-        # no copy by default for DataFrame({"A": ndarray})
+        # no copy by default for DataFrame({"A": extension_array})
         if copy is None:
             copy = False
         subarr = data