Skip to content

API: Series and DataFrame constructors to return shallow copy (i.e. don't share index) from another Series/DataFrame #50539

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

5 changes: 5 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,11 @@ Other API changes
or :attr:`~DataFrame.iloc` (thus, ``df.loc[:, :]`` or ``df.iloc[:, :]``) now returns a
new DataFrame (shallow copy) instead of the original DataFrame, consistent with other
methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`)
- The :class:`Series` and :class:`DataFrame` constructors will now return a shallow copy
(i.e. share data, but not attributes) when passed a Series and DataFrame,
respectively, and with the default of ``copy=False`` (and if no other keyword triggers
a copy). Previously, the new Series or DataFrame would share the index attribute (e.g.
``df.index = ...`` would also update the index of the parent or child) (:issue:`49523`)
- Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`)
- :class:`DataFrame` objects read from a :class:`HDFStore` file without an index now have a :class:`RangeIndex` instead of an ``int64`` index (:issue:`51076`)
- Instantiating an :class:`Index` with an numeric numpy dtype with data containing :class:`NA` and/or :class:`NaT` now raises a ``ValueError``. Previously a ``TypeError`` was raised (:issue:`51050`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,10 @@ def __init__(

if isinstance(data, DataFrame):
data = data._mgr
if not copy:
# if not copying data, ensure to still return a shallow copy
# to avoid the result sharing the same Manager
data = data.copy(deep=False)

if isinstance(data, (BlockManager, ArrayManager)):
# first check if a Manager is passed without any other arguments
Expand Down
5 changes: 1 addition & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,10 +446,7 @@ def __init__(
elif isinstance(data, Series):
if index is None:
index = data.index
if using_copy_on_write():
data = data._mgr.copy(deep=False)
else:
data = data._mgr
data = data._mgr.copy(deep=False)
else:
data = data.reindex(index, copy=copy)
copy = False
Expand Down
15 changes: 12 additions & 3 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,10 +285,8 @@ def test_constructor_dtype_nocast_view_dataframe(self, using_copy_on_write):
df = DataFrame([[1, 2]])
should_be_view = DataFrame(df, dtype=df[0].dtype)
if using_copy_on_write:
# TODO(CoW) doesn't mutate original
should_be_view.iloc[0, 0] = 99
# assert df.values[0, 0] == 1
assert df.values[0, 0] == 99
assert df.values[0, 0] == 1
else:
should_be_view[0][0] = 99
assert df.values[0, 0] == 99
Expand Down Expand Up @@ -2100,6 +2098,17 @@ def test_constructor_frame_copy(self, float_frame):
assert (cop["A"] == 5).all()
assert not (float_frame["A"] == 5).all()

def test_constructor_frame_shallow_copy(self, float_frame):
# constructing a DataFrame from DataFrame with copy=False should still
# give a "shallow" copy (share data, not attributes)
# https://github.com/pandas-dev/pandas/issues/49523
orig = float_frame.copy()
cop = DataFrame(float_frame)
assert cop._mgr is not float_frame._mgr
# Overwriting index of copy doesn't change original
cop.index = np.arange(len(cop))
tm.assert_frame_equal(float_frame, orig)

def test_constructor_ndarray_copy(self, float_frame, using_array_manager):
if not using_array_manager:
df = DataFrame(float_frame.values)
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,18 @@ def test_constructor_limit_copies(self, index):
# we make 1 copy; this is just a smoke test here
assert s._mgr.blocks[0].values is not index

def test_constructor_shallow_copy(self):
# constructing a Series from Series with copy=False should still
# give a "shallow" copy (share data, not attributes)
# https://github.com/pandas-dev/pandas/issues/49523
s = Series([1, 2, 3])
s_orig = s.copy()
s2 = Series(s)
assert s2._mgr is not s._mgr
# Overwriting index of s2 doesn't change s
s2.index = ["a", "b", "c"]
tm.assert_series_equal(s, s_orig)

def test_constructor_pass_none(self):
s = Series(None, index=range(5))
assert s.dtype == np.float64
Expand Down