diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index cdc9cbe0d7261..cb8725f3a776a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -190,6 +190,9 @@ Copy-on-Write improvements of Series objects and specifying ``copy=False``, will now use a lazy copy of those Series objects for the columns of the DataFrame (:issue:`50777`) +- The :class:`DataFrame` constructor, when constructing a DataFrame from a + :class:`Series` and specifying ``copy=False``, will now respect Copy-on-Write. + - The :class:`DataFrame` constructor, when constructing from a NumPy array, will now copy the array by default to avoid mutating the :class:`DataFrame` when mutating the array. Specify ``copy=False`` to get the old behavior. diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index b114b8a1aa7aa..5c46051018b9a 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -17,6 +17,7 @@ from pandas._libs import lib +from pandas.core.dtypes.astype import astype_is_view from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, dict_compat, @@ -260,6 +261,7 @@ def ndarray_to_mgr( copy_on_sanitize = False if typ == "array" else copy vdtype = getattr(values, "dtype", None) + refs = None if is_1d_only_ea_dtype(vdtype) or is_1d_only_ea_dtype(dtype): # GH#19157 @@ -291,7 +293,20 @@ def ndarray_to_mgr( if values.ndim == 1: values = values.reshape(-1, 1) - elif isinstance(values, (np.ndarray, ExtensionArray, ABCSeries, Index)): + elif isinstance(values, ABCSeries): + if not copy_on_sanitize and ( + dtype is None or astype_is_view(values.dtype, dtype) + ): + refs = values._references + + if copy_on_sanitize: + values = values._values.copy() + else: + values = values._values + + values = _ensure_2d(values) + + elif isinstance(values, (np.ndarray, ExtensionArray, Index)): # drop subclass info values = np.array(values, copy=copy_on_sanitize) values = _ensure_2d(values) @@ -356,11 +371,11 @@ def ndarray_to_mgr( ] else: bp = BlockPlacement(slice(len(columns))) - nb = new_block_2d(values, placement=bp) + nb = new_block_2d(values, placement=bp, refs=refs) block_values = [nb] else: bp = BlockPlacement(slice(len(columns))) - nb = new_block_2d(values, placement=bp) + nb = new_block_2d(values, placement=bp, refs=refs) block_values = [nb] if len(columns) == 0: diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 8ceb3cd7d2d2b..d0750f2d090de 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -200,6 +200,38 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype): assert np.shares_memory(arr_before, arr_after) +@pytest.mark.parametrize( + "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)] +) +def test_dataframe_from_series(using_copy_on_write, data, dtype): + ser = Series(data, dtype=dtype) + ser_orig = ser.copy() + df = DataFrame(ser, dtype=dtype) + assert np.shares_memory(get_array(ser), get_array(df, 0)) + if using_copy_on_write: + assert not df._mgr._has_no_reference(0) + + df.iloc[0, 0] = data[-1] + if using_copy_on_write: + tm.assert_series_equal(ser, ser_orig) + + +def test_dataframe_from_series_different_dtype(using_copy_on_write): + ser = Series([1, 2], dtype="int64") + df = DataFrame(ser, dtype="int32") + assert not np.shares_memory(get_array(ser), get_array(df, 0)) + if using_copy_on_write: + assert df._mgr._has_no_reference(0) + + +def test_dataframe_from_series_infer_datetime(using_copy_on_write): + ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object) + df = DataFrame(ser) + assert not np.shares_memory(get_array(ser), get_array(df, 0)) + if using_copy_on_write: + assert df._mgr._has_no_reference(0) + + @pytest.mark.parametrize("index", [None, [0, 1, 2]]) def test_dataframe_from_dict_of_series_with_dtype(index): # Variant of above, but now passing a dtype that causes a copy