Skip to content

API / CoW: constructing Series from Series creates lazy copy (with default copy=False) #49524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

4 changes: 4 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ Copy-on-Write improvements
returning multiple times an identical, cached Series object). This ensures that those
Series objects correctly follow the Copy-on-Write rules (:issue:`49450`)

- The :class:`Series` constructor will now create a lazy copy (deferring the copy until
a modification to the data happens) when constructing a Series from an existing
Series with the default of ``copy=False`` (:issue:`50471`)

Copy-on-Write can be enabled through

.. code-block:: python
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,10 +426,14 @@ def __init__(
elif isinstance(data, Series):
if index is None:
index = data.index
if using_copy_on_write():
data = data._mgr.copy(deep=False)
else:
data = data._mgr
else:
data = data.reindex(index, copy=copy)
copy = False
data = data._mgr
data = data._mgr
elif is_dict_like(data):
data, index = self._init_dict(data, index, dtype)
dtype = None
Expand Down
75 changes: 75 additions & 0 deletions pandas/tests/copy_view/test_constructors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import numpy as np

from pandas import Series

# -----------------------------------------------------------------------------
# Copy/view behaviour for Series / DataFrame constructors


def test_series_from_series(using_copy_on_write):
# Case: constructing a Series from another Series object follows CoW rules:
# a new object is returned and thus mutations are not propagated
ser = Series([1, 2, 3], name="name")

# default is copy=False -> new Series is a shallow copy / view of original
result = Series(ser)

# the shallow copy still shares memory
assert np.shares_memory(ser.values, result.values)

if using_copy_on_write:
assert result._mgr.refs is not None

if using_copy_on_write:
# mutating new series copy doesn't mutate original
result.iloc[0] = 0
assert ser.iloc[0] == 1
# mutating triggered a copy-on-write -> no longer shares memory
assert not np.shares_memory(ser.values, result.values)
else:
# mutating shallow copy does mutate original
result.iloc[0] = 0
assert ser.iloc[0] == 0
# and still shares memory
assert np.shares_memory(ser.values, result.values)

# the same when modifying the parent
result = Series(ser)

if using_copy_on_write:
# mutating original doesn't mutate new series
ser.iloc[0] = 0
assert result.iloc[0] == 1
else:
# mutating original does mutate shallow copy
ser.iloc[0] = 0
assert result.iloc[0] == 0


def test_series_from_series_with_reindex(using_copy_on_write):
# Case: constructing a Series from another Series with specifying an index
# that potentially requires a reindex of the values
ser = Series([1, 2, 3], name="name")

# passing an index that doesn't actually require a reindex of the values
# -> without CoW we get an actual mutating view
for index in [
ser.index,
ser.index.copy(),
list(ser.index),
ser.index.rename("idx"),
]:
result = Series(ser, index=index)
assert np.shares_memory(ser.values, result.values)
result.iloc[0] = 0
if using_copy_on_write:
assert ser.iloc[0] == 1
else:
assert ser.iloc[0] == 0

# ensure that if an actual reindex is needed, we don't have any refs
# (mutating the result wouldn't trigger CoW)
result = Series(ser, index=[0, 1, 2, 3])
assert not np.shares_memory(ser.values, result.values)
if using_copy_on_write:
assert result._mgr.refs is None or result._mgr.refs[0] is None
3 changes: 0 additions & 3 deletions pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,3 @@ def test_dataframe_add_column_from_series():
df.loc[2, "new"] = 100
expected_s = Series([0, 11, 12])
tm.assert_series_equal(s, expected_s)


# TODO add tests for constructors