pandas-dev · phofl · Mar 29, 2023 · Mar 16, 2023 · Mar 16, 2023 · Mar 16, 2023
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -193,12 +193,13 @@ Copy-on-Write improvements
 - The :class:`DataFrame` constructor, when constructing a DataFrame from a
   :class:`Series` and specifying ``copy=False``, will now respect Copy-on-Write.
 
-- The :class:`DataFrame` constructor, when constructing from a NumPy array,
-  will now copy the array by default to avoid mutating the :class:`DataFrame`
+- The :class:`DataFrame` and :class:`Series` constructors, when constructing from
+  a NumPy array, will now copy the array by default to avoid mutating
+  the :class:`DataFrame` / :class:`Series`
   when mutating the array. Specify ``copy=False`` to get the old behavior.
   When setting ``copy=False`` pandas does not guarantee correct Copy-on-Write
   behavior when the NumPy array is modified after creation of the
-  :class:`DataFrame`.
+  :class:`DataFrame` / :class:`Series`.
 
 - The :meth:`DataFrame.from_records` will now respect Copy-on-Write when called
   with a :class:`DataFrame`.

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -708,7 +708,7 @@ def _create_series(index):
     """Helper for the _series dict"""
     size = len(index)
     data = np.random.randn(size)
-    return Series(data, index=index, name="a")
+    return Series(data, index=index, name="a", copy=False)
 
 
 _series = {

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -55,6 +55,7 @@
     validate_percentile,
 )
 
+from pandas.core.dtypes.astype import astype_is_view
 from pandas.core.dtypes.cast import (
     LossySetitemError,
     convert_dtypes,
@@ -373,14 +374,14 @@ def __init__(
         index=None,
         dtype: Dtype | None = None,
         name=None,
-        copy: bool = False,
+        copy: bool | None = None,
         fastpath: bool = False,
     ) -> None:
         if (
             isinstance(data, (SingleBlockManager, SingleArrayManager))
             and index is None
             and dtype is None
-            and copy is False
+            and (copy is False or copy is None)
         ):
             if using_copy_on_write():
                 data = data.copy(deep=False)
@@ -393,6 +394,13 @@ def __init__(
                 self.name = name
             return
 
+        if isinstance(data, (ExtensionArray, np.ndarray)):
+            if copy is not False and using_copy_on_write():
+                if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)):
+                    data = data.copy()
+        if copy is None:
+            copy = False
+
         # we are called internally, so short-circuit
         if fastpath:
             # data is a ndarray, index is defined
@@ -5855,7 +5863,7 @@ def _construct_result(
         # TODO: result should always be ArrayLike, but this fails for some
         #  JSONArray tests
         dtype = getattr(result, "dtype", None)
-        out = self._constructor(result, index=self.index, dtype=dtype)
+        out = self._constructor(result, index=self.index, dtype=dtype, copy=False)
         out = out.__finalize__(self)
 
         # Set the result's name after __finalize__ is called because __finalize__
@@ -5874,7 +5882,7 @@ def _flex_method(self, other, op, *, level=None, fill_value=None, axis: Axis = 0
         elif isinstance(other, (np.ndarray, list, tuple)):
             if len(other) != len(self):
                 raise ValueError("Lengths must be equal")
-            other = self._constructor(other, self.index)
+            other = self._constructor(other, self.index, copy=False)
             result = self._binop(other, op, level=level, fill_value=fill_value)
             result.name = res_name
             return result

diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py
@@ -60,15 +60,15 @@ def test_replace_categorical(to_replace, value, result, expected_error_msg):
     # GH#26988
     cat = Categorical(["a", "b"])
     expected = Categorical(result)
-    result = pd.Series(cat).replace(to_replace, value)._values
+    result = pd.Series(cat, copy=False).replace(to_replace, value)._values
 
     tm.assert_categorical_equal(result, expected)
     if to_replace == "b":  # the "c" test is supposed to be unchanged
         with pytest.raises(AssertionError, match=expected_error_msg):
             # ensure non-inplace call does not affect original
             tm.assert_categorical_equal(cat, expected)
 
-    pd.Series(cat).replace(to_replace, value, inplace=True)
+    pd.Series(cat, copy=False).replace(to_replace, value, inplace=True)
     tm.assert_categorical_equal(cat, expected)
 
 

diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py
@@ -200,7 +200,11 @@ def test_astype_arrow_timestamp(using_copy_on_write):
     result = df.astype("timestamp[ns][pyarrow]")
     if using_copy_on_write:
         assert not result._mgr._has_no_reference(0)
-        assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array)
+        # TODO(CoW): arrow is not setting copy=False in the Series constructor
+        # under the hood
+        assert not np.shares_memory(
+            get_array(df, "a"), get_array(result, "a")._pa_array
+        )
 
 
 def test_convert_dtypes_infer_objects(using_copy_on_write):

diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
@@ -30,7 +30,7 @@ def test_series_from_series(dtype, using_copy_on_write):
     result = Series(ser, dtype=dtype)
 
     # the shallow copy still shares memory
-    assert np.shares_memory(ser.values, result.values)
+    assert np.shares_memory(get_array(ser), get_array(result))
 
     if using_copy_on_write:
         assert result._mgr.blocks[0].refs.has_reference()
@@ -40,13 +40,13 @@ def test_series_from_series(dtype, using_copy_on_write):
         result.iloc[0] = 0
         assert ser.iloc[0] == 1
         # mutating triggered a copy-on-write -> no longer shares memory
-        assert not np.shares_memory(ser.values, result.values)
+        assert not np.shares_memory(get_array(ser), get_array(result))
     else:
         # mutating shallow copy does mutate original
         result.iloc[0] = 0
         assert ser.iloc[0] == 0
         # and still shares memory
-        assert np.shares_memory(ser.values, result.values)
+        assert np.shares_memory(get_array(ser), get_array(result))
 
     # the same when modifying the parent
     result = Series(ser, dtype=dtype)
@@ -90,6 +90,38 @@ def test_series_from_series_with_reindex(using_copy_on_write):
         assert not result._mgr.blocks[0].refs.has_reference()
 
 
+@pytest.mark.parametrize("fastpath", [False, True])
+@pytest.mark.parametrize("dtype", [None, "int64"])
+@pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)])
+@pytest.mark.parametrize(
+    "arr", [np.array([1, 2, 3], dtype="int64"), pd.array([1, 2, 3], dtype="Int64")]
+)
+def test_series_from_array(using_copy_on_write, idx, dtype, fastpath, arr):
+    if idx is None or dtype is not None:
+        fastpath = False
+    ser = Series(arr, dtype=dtype, index=idx, fastpath=fastpath)
+    ser_orig = ser.copy()
+    data = getattr(arr, "_data", arr)
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(ser), data)
+    else:
+        assert np.shares_memory(get_array(ser), data)
+
+    arr[0] = 100
+    if using_copy_on_write:
+        tm.assert_series_equal(ser, ser_orig)
+    else:
+        expected = Series([100, 2, 3], dtype=dtype if dtype is not None else arr.dtype)
+        tm.assert_series_equal(ser, expected)
+
+
+@pytest.mark.parametrize("copy", [True, False, None])
+def test_series_from_array_different_dtype(using_copy_on_write, copy):
+    arr = np.array([1, 2, 3], dtype="int64")
+    ser = Series(arr, dtype="int32", copy=copy)
+    assert not np.shares_memory(get_array(ser), arr)
+
+
 @pytest.mark.parametrize(
     "idx",
     [

diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py
@@ -22,7 +22,7 @@ def test_array_from_scalars(self, data):
         assert isinstance(result, type(data))
 
     def test_series_constructor(self, data):
-        result = pd.Series(data)
+        result = pd.Series(data, copy=False)
         assert result.dtype == data.dtype
         assert len(result) == len(data)
         if hasattr(result._mgr, "blocks"):

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -271,7 +271,7 @@ def test_fillna_copy_frame(self, data_missing):
 
     def test_fillna_copy_series(self, data_missing):
         arr = data_missing.take([1, 1])
-        ser = pd.Series(arr)
+        ser = pd.Series(arr, copy=False)
         ser_orig = ser.copy()
 
         filled_val = ser[0]

diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
@@ -288,7 +288,7 @@ def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
 
     def test_fillna_copy_series(self, data_missing, using_copy_on_write):
         arr = data_missing.take([1, 1])
-        ser = pd.Series(arr)
+        ser = pd.Series(arr, copy=False)
 
         filled_val = ser[0]
         result = ser.fillna(filled_val)

diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -1364,7 +1364,7 @@ def check_can_hold_element(self, obj, elem, inplace: bool):
 
     def check_series_setitem(self, elem, index: Index, inplace: bool):
         arr = index._data.copy()
-        ser = Series(arr)
+        ser = Series(arr, copy=False)
 
         self.check_can_hold_element(ser, elem, inplace)
 

diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -2821,7 +2821,7 @@ def __getitem__(self, ix):
             def dtype(self):
                 return DtypeStub()
 
-        series = Series(ExtTypeStub())
+        series = Series(ExtTypeStub(), copy=False)
         res = repr(series)  # This line crashed before #33770 was fixed.
         expected = "\n".join(
             ["0    [False  True]", "1    [ True False]", "dtype: DtypeStub"]

diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py
@@ -70,7 +70,7 @@ def test_td64_mean(self, box):
         tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], unit="D")
 
         tdarr = tdi._data
-        obj = box(tdarr)
+        obj = box(tdarr, copy=False)
 
         result = obj.mean()
         expected = np.array(tdarr).mean()

diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
@@ -578,7 +578,7 @@ def test_setitem_scalar_into_readonly_backing_data():
 
     array = np.zeros(5)
     array.flags.writeable = False  # make the array immutable
-    series = Series(array)
+    series = Series(array, copy=False)
 
     for n in series.index:
         msg = "assignment destination is read-only"
@@ -593,7 +593,7 @@ def test_setitem_slice_into_readonly_backing_data():
 
     array = np.zeros(5)
     array.flags.writeable = False  # make the array immutable
-    series = Series(array)
+    series = Series(array, copy=False)
 
     msg = "assignment destination is read-only"
     with pytest.raises(ValueError, match=msg):

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -527,7 +527,7 @@ def test_categorical_sideeffects_free(self):
         # however, copy is False by default
         # so this WILL change values
         cat = Categorical(["a", "b", "c", "a"])
-        s = Series(cat)
+        s = Series(cat, copy=False)
         assert s.values is cat
         s = s.cat.rename_categories([1, 2, 3])
         assert s.values is not cat