Skip to content

Commit 8906d4a

Browse files
Backport PR pandas-dev#51239 on branch 2.0.x (API / CoW: constructing DataFrame from DataFrame/BlockManager creates lazy copy) (pandas-dev#51650)
Backport PR pandas-dev#51239: API / CoW: constructing DataFrame from DataFrame/BlockManager creates lazy copy Co-authored-by: Patrick Hoefler <[email protected]>
1 parent 9086bde commit 8906d4a

File tree

3 files changed

+25
-0
lines changed

3 files changed

+25
-0
lines changed

doc/source/whatsnew/v2.0.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,10 @@ Copy-on-Write improvements
246246
a modification to the data happens) when constructing a Series from an existing
247247
Series with the default of ``copy=False`` (:issue:`50471`)
248248

249+
- The :class:`DataFrame` constructor will now create a lazy copy (deferring the copy until
250+
a modification to the data happens) when constructing from an existing
251+
:class:`DataFrame` with the default of ``copy=False`` (:issue:`51239`)
252+
249253
- The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary
250254
of Series objects and specifying ``copy=False``, will now use a lazy copy
251255
of those Series objects for the columns of the DataFrame (:issue:`50777`)

pandas/core/frame.py

+2
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,8 @@ def __init__(
656656
data = data.copy(deep=False)
657657

658658
if isinstance(data, (BlockManager, ArrayManager)):
659+
if using_copy_on_write():
660+
data = data.copy(deep=False)
659661
# first check if a Manager is passed without any other arguments
660662
# -> use fastpath (without checking Manager type)
661663
if index is None and columns is None and dtype is None and not copy:

pandas/tests/copy_view/test_constructors.py

+19
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,25 @@ def test_series_from_series_with_reindex(using_copy_on_write):
8282
assert not result._mgr.blocks[0].refs.has_reference()
8383

8484

85+
@pytest.mark.parametrize("func", [lambda x: x, lambda x: x._mgr])
86+
@pytest.mark.parametrize("columns", [None, ["a"]])
87+
def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, func):
88+
df = DataFrame({"a": [1, 2, 3]})
89+
df_orig = df.copy()
90+
91+
new_df = DataFrame(func(df))
92+
93+
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
94+
new_df.iloc[0] = 100
95+
96+
if using_copy_on_write:
97+
assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
98+
tm.assert_frame_equal(df, df_orig)
99+
else:
100+
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
101+
tm.assert_frame_equal(df, new_df)
102+
103+
85104
@pytest.mark.parametrize("dtype", [None, "int64", "Int64"])
86105
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
87106
@pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])

0 commit comments

Comments
 (0)