pandas-dev · phofl · Mar 29, 2023 · Mar 16, 2023 · Mar 16, 2023 · Mar 16, 2023
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -190,12 +190,13 @@ Copy-on-Write improvements
   of Series objects and specifying ``copy=False``, will now use a lazy copy
   of those Series objects for the columns of the DataFrame (:issue:`50777`)
 
-- The :class:`DataFrame` constructor, when constructing from a NumPy array,
-  will now copy the array by default to avoid mutating the :class:`DataFrame`
+- The :class:`DataFrame` and :class:`Series` constructors, when constructing from
+  a NumPy array, will now copy the array by default to avoid mutating
+  the :class:`DataFrame` / :class:`Series`
   when mutating the array. Specify ``copy=False`` to get the old behavior.
   When setting ``copy=False`` pandas does not guarantee correct Copy-on-Write
   behavior when the NumPy array is modified after creation of the
-  :class:`DataFrame`.
+  :class:`DataFrame` / :class:`Series`.
 
 - Trying to set values using chained assignment (for example, ``df["a"][1:3] = 0``)
   will now always raise an warning when Copy-on-Write is enabled. In this mode,

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -708,7 +708,7 @@ def _create_series(index):
     """Helper for the _series dict"""
     size = len(index)
     data = np.random.randn(size)
-    return Series(data, index=index, name="a")
+    return Series(data, index=index, name="a", copy=False)
 
 
 _series = {

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -58,6 +58,7 @@
     validate_percentile,
 )
 
+from pandas.core.dtypes.astype import astype_is_view
 from pandas.core.dtypes.cast import (
     LossySetitemError,
     convert_dtypes,
@@ -376,9 +377,15 @@ def __init__(
         index=None,
         dtype: Dtype | None = None,
         name=None,
-        copy: bool = False,
+        copy: bool | None = None,
         fastpath: bool = False,
     ) -> None:
+        if copy is None:
+            if using_copy_on_write():
+                copy = True
+            else:
+                copy = False
+
         if (
             isinstance(data, (SingleBlockManager, SingleArrayManager))
             and index is None
@@ -394,6 +401,11 @@ def __init__(
                 self.name = name
             return
 
+        if isinstance(data, (ExtensionArray, np.ndarray)):
+            if copy and using_copy_on_write():
+                if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)):
+                    data = data.copy()
+
         # we are called internally, so short-circuit
         if fastpath:
             # data is a ndarray, index is defined
@@ -6092,7 +6104,7 @@ def _construct_result(
         # TODO: result should always be ArrayLike, but this fails for some
         #  JSONArray tests
         dtype = getattr(result, "dtype", None)
-        out = self._constructor(result, index=self.index, dtype=dtype)
+        out = self._constructor(result, index=self.index, dtype=dtype, copy=False)
         out = out.__finalize__(self)
 
         # Set the result's name after __finalize__ is called because __finalize__
@@ -6111,7 +6123,7 @@ def _flex_method(self, other, op, *, level=None, fill_value=None, axis: Axis = 0
         elif isinstance(other, (np.ndarray, list, tuple)):
             if len(other) != len(self):
                 raise ValueError("Lengths must be equal")
-            other = self._constructor(other, self.index)
+            other = self._constructor(other, self.index, copy=False)
             result = self._binop(other, op, level=level, fill_value=fill_value)
             result.name = res_name
             return result

diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py
@@ -60,15 +60,15 @@ def test_replace_categorical(to_replace, value, result, expected_error_msg):
     # GH#26988
     cat = Categorical(["a", "b"])
     expected = Categorical(result)
-    result = pd.Series(cat).replace(to_replace, value)._values
+    result = pd.Series(cat, copy=False).replace(to_replace, value)._values
 
     tm.assert_categorical_equal(result, expected)
     if to_replace == "b":  # the "c" test is supposed to be unchanged
         with pytest.raises(AssertionError, match=expected_error_msg):
             # ensure non-inplace call does not affect original
             tm.assert_categorical_equal(cat, expected)
 
-    pd.Series(cat).replace(to_replace, value, inplace=True)
+    pd.Series(cat, copy=False).replace(to_replace, value, inplace=True)
     tm.assert_categorical_equal(cat, expected)
 
 

diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py
@@ -200,7 +200,11 @@ def test_astype_arrow_timestamp(using_copy_on_write):
     result = df.astype("timestamp[ns][pyarrow]")
     if using_copy_on_write:
         assert not result._mgr._has_no_reference(0)
-        assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array)
+        # TODO(CoW): arrow is not setting copy=False in the Series constructor
+        # under the hood
+        assert not np.shares_memory(
+            get_array(df, "a"), get_array(result, "a")._pa_array
+        )
 
 
 def test_convert_dtypes_infer_objects(using_copy_on_write):

diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pytest
 
+import pandas as pd
 from pandas import (
     DataFrame,
     DatetimeIndex,
@@ -89,6 +90,38 @@ def test_series_from_series_with_reindex(using_copy_on_write):
         assert not result._mgr.blocks[0].refs.has_reference()
 
 
+@pytest.mark.parametrize("fastpath", [False, True])
+@pytest.mark.parametrize("dtype", [None, "int64"])
+@pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)])
+@pytest.mark.parametrize(
+    "arr", [np.array([1, 2, 3], dtype="int64"), pd.array([1, 2, 3], dtype="Int64")]
+)
+def test_series_from_array(using_copy_on_write, idx, dtype, fastpath, arr):
+    if idx is None or dtype is not None:
+        fastpath = False
+    ser = Series(arr, dtype=dtype, index=idx, fastpath=fastpath)
+    ser_orig = ser.copy()
+    data = getattr(arr, "_data", arr)
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(ser), data)
+    else:
+        assert np.shares_memory(get_array(ser), data)
+
+    arr[0] = 100
+    if using_copy_on_write:
+        tm.assert_series_equal(ser, ser_orig)
+    else:
+        expected = Series([100, 2, 3], dtype=dtype if dtype is not None else arr.dtype)
+        tm.assert_series_equal(ser, expected)
+
+
+@pytest.mark.parametrize("copy", [True, False, None])
+def test_series_from_array_different_dtype(using_copy_on_write, copy):
+    arr = np.array([1, 2, 3], dtype="int64")
+    ser = Series(arr, dtype="int32", copy=copy)
+    assert not np.shares_memory(get_array(ser), arr)
+
+
 @pytest.mark.parametrize(
     "idx",
     [

diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py
@@ -22,7 +22,7 @@ def test_array_from_scalars(self, data):
         assert isinstance(result, type(data))
 
     def test_series_constructor(self, data):
-        result = pd.Series(data)
+        result = pd.Series(data, copy=False)
         assert result.dtype == data.dtype
         assert len(result) == len(data)
         if hasattr(result._mgr, "blocks"):

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -271,7 +271,7 @@ def test_fillna_copy_frame(self, data_missing):
 
     def test_fillna_copy_series(self, data_missing):
         arr = data_missing.take([1, 1])
-        ser = pd.Series(arr)
+        ser = pd.Series(arr, copy=False)
         ser_orig = ser.copy()
 
         filled_val = ser[0]

diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
@@ -288,7 +288,7 @@ def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
 
     def test_fillna_copy_series(self, data_missing, using_copy_on_write):
         arr = data_missing.take([1, 1])
-        ser = pd.Series(arr)
+        ser = pd.Series(arr, copy=False)
 
         filled_val = ser[0]
         result = ser.fillna(filled_val)

diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -1364,7 +1364,7 @@ def check_can_hold_element(self, obj, elem, inplace: bool):
 
     def check_series_setitem(self, elem, index: Index, inplace: bool):
         arr = index._data.copy()
-        ser = Series(arr)
+        ser = Series(arr, copy=False)
 
         self.check_can_hold_element(ser, elem, inplace)
 

diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -2821,7 +2821,7 @@ def __getitem__(self, ix):
             def dtype(self):
                 return DtypeStub()
 
-        series = Series(ExtTypeStub())
+        series = Series(ExtTypeStub(), copy=False)
         res = repr(series)  # This line crashed before #33770 was fixed.
         expected = "\n".join(
             ["0    [False  True]", "1    [ True False]", "dtype: DtypeStub"]

diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py
@@ -70,7 +70,7 @@ def test_td64_mean(self, box):
         tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], unit="D")
 
         tdarr = tdi._data
-        obj = box(tdarr)
+        obj = box(tdarr, copy=False)
 
         result = obj.mean()
         expected = np.array(tdarr).mean()

diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
@@ -578,7 +578,7 @@ def test_setitem_scalar_into_readonly_backing_data():
 
     array = np.zeros(5)
     array.flags.writeable = False  # make the array immutable
-    series = Series(array)
+    series = Series(array, copy=False)
 
     for n in series.index:
         msg = "assignment destination is read-only"
@@ -593,7 +593,7 @@ def test_setitem_slice_into_readonly_backing_data():
 
     array = np.zeros(5)
     array.flags.writeable = False  # make the array immutable
-    series = Series(array)
+    series = Series(array, copy=False)
 
     msg = "assignment destination is read-only"
     with pytest.raises(ValueError, match=msg):

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -527,7 +527,7 @@ def test_categorical_sideeffects_free(self):
         # however, copy is False by default
         # so this WILL change values
         cat = Categorical(["a", "b", "c", "a"])
-        s = Series(cat)
+        s = Series(cat, copy=False)
         assert s.values is cat
         s = s.cat.rename_categories([1, 2, 3])
         assert s.values is not cat