diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b006d3820889f..5e8d2c9c39259 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -771,6 +771,11 @@ Other API changes or :attr:`~DataFrame.iloc` (thus, ``df.loc[:, :]`` or ``df.iloc[:, :]``) now returns a new DataFrame (shallow copy) instead of the original DataFrame, consistent with other methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`) +- The :class:`Series` and :class:`DataFrame` constructors will now return a shallow copy + (i.e. share data, but not attributes) when passed a Series and DataFrame, + respectively, and with the default of ``copy=False`` (and if no other keyword triggers + a copy). Previously, the new Series or DataFrame would share the index attribute (e.g. + ``df.index = ...`` would also update the index of the parent or child) (:issue:`49523`) - Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`) - :class:`DataFrame` objects read from a :class:`HDFStore` file without an index now have a :class:`RangeIndex` instead of an ``int64`` index (:issue:`51076`) - Instantiating an :class:`Index` with an numeric numpy dtype with data containing :class:`NA` and/or :class:`NaT` now raises a ``ValueError``. Previously a ``TypeError`` was raised (:issue:`51050`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2361c254f5161..2650090a3f61a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -652,6 +652,10 @@ def __init__( if isinstance(data, DataFrame): data = data._mgr + if not copy: + # if not copying data, ensure to still return a shallow copy + # to avoid the result sharing the same Manager + data = data.copy(deep=False) if isinstance(data, (BlockManager, ArrayManager)): # first check if a Manager is passed without any other arguments diff --git a/pandas/core/series.py b/pandas/core/series.py index 80dd0dd19f96f..34d18b779c71b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -446,10 +446,7 @@ def __init__( elif isinstance(data, Series): if index is None: index = data.index - if using_copy_on_write(): - data = data._mgr.copy(deep=False) - else: - data = data._mgr + data = data._mgr.copy(deep=False) else: data = data.reindex(index, copy=copy) copy = False diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c621e9bae78f8..d4d2164f2a4a7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -285,10 +285,8 @@ def test_constructor_dtype_nocast_view_dataframe(self, using_copy_on_write): df = DataFrame([[1, 2]]) should_be_view = DataFrame(df, dtype=df[0].dtype) if using_copy_on_write: - # TODO(CoW) doesn't mutate original should_be_view.iloc[0, 0] = 99 - # assert df.values[0, 0] == 1 - assert df.values[0, 0] == 99 + assert df.values[0, 0] == 1 else: should_be_view[0][0] = 99 assert df.values[0, 0] == 99 @@ -2100,6 +2098,17 @@ def test_constructor_frame_copy(self, float_frame): assert (cop["A"] == 5).all() assert not (float_frame["A"] == 5).all() + def test_constructor_frame_shallow_copy(self, float_frame): + # constructing a DataFrame from DataFrame with copy=False should still + # give a "shallow" copy (share data, not attributes) + # https://github.com/pandas-dev/pandas/issues/49523 + orig = float_frame.copy() + cop = DataFrame(float_frame) + assert cop._mgr is not float_frame._mgr + # Overwriting index of copy doesn't change original + cop.index = np.arange(len(cop)) + tm.assert_frame_equal(float_frame, orig) + def test_constructor_ndarray_copy(self, float_frame, using_array_manager): if not using_array_manager: df = DataFrame(float_frame.values) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 2d85462691896..951c9df0f6e99 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -727,6 +727,18 @@ def test_constructor_limit_copies(self, index): # we make 1 copy; this is just a smoke test here assert s._mgr.blocks[0].values is not index + def test_constructor_shallow_copy(self): + # constructing a Series from Series with copy=False should still + # give a "shallow" copy (share data, not attributes) + # https://github.com/pandas-dev/pandas/issues/49523 + s = Series([1, 2, 3]) + s_orig = s.copy() + s2 = Series(s) + assert s2._mgr is not s._mgr + # Overwriting index of s2 doesn't change s + s2.index = ["a", "b", "c"] + tm.assert_series_equal(s, s_orig) + def test_constructor_pass_none(self): s = Series(None, index=range(5)) assert s.dtype == np.float64