Skip to content

Commit a7fec97

Browse files
authored
BUG-CoW: DataFrame constructed from Series not respecting CoW (#52031)
1 parent 81fea58 commit a7fec97

File tree

3 files changed

+52
-3
lines changed

3 files changed

+52
-3
lines changed

doc/source/whatsnew/v2.0.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,9 @@ Copy-on-Write improvements
190190
of Series objects and specifying ``copy=False``, will now use a lazy copy
191191
of those Series objects for the columns of the DataFrame (:issue:`50777`)
192192

193+
- The :class:`DataFrame` constructor, when constructing a DataFrame from a
194+
:class:`Series` and specifying ``copy=False``, will now respect Copy-on-Write.
195+
193196
- The :class:`DataFrame` constructor, when constructing from a NumPy array,
194197
will now copy the array by default to avoid mutating the :class:`DataFrame`
195198
when mutating the array. Specify ``copy=False`` to get the old behavior.

pandas/core/internals/construction.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ def ndarray_to_mgr(
260260
copy_on_sanitize = False if typ == "array" else copy
261261

262262
vdtype = getattr(values, "dtype", None)
263+
refs = None
263264
if is_1d_only_ea_dtype(vdtype) or is_1d_only_ea_dtype(dtype):
264265
# GH#19157
265266

@@ -291,7 +292,20 @@ def ndarray_to_mgr(
291292
if values.ndim == 1:
292293
values = values.reshape(-1, 1)
293294

294-
elif isinstance(values, (np.ndarray, ExtensionArray, ABCSeries, Index)):
295+
elif isinstance(values, ABCSeries):
296+
if not copy_on_sanitize and (
297+
dtype is None or astype_is_view(values.dtype, dtype)
298+
):
299+
refs = values._references
300+
301+
if copy_on_sanitize:
302+
values = values._values.copy()
303+
else:
304+
values = values._values
305+
306+
values = _ensure_2d(values)
307+
308+
elif isinstance(values, (np.ndarray, ExtensionArray, Index)):
295309
# drop subclass info
296310
_copy = (
297311
copy_on_sanitize
@@ -361,11 +375,11 @@ def ndarray_to_mgr(
361375
]
362376
else:
363377
bp = BlockPlacement(slice(len(columns)))
364-
nb = new_block_2d(values, placement=bp)
378+
nb = new_block_2d(values, placement=bp, refs=refs)
365379
block_values = [nb]
366380
else:
367381
bp = BlockPlacement(slice(len(columns)))
368-
nb = new_block_2d(values, placement=bp)
382+
nb = new_block_2d(values, placement=bp, refs=refs)
369383
block_values = [nb]
370384

371385
if len(columns) == 0:

pandas/tests/copy_view/test_constructors.py

+32
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,38 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
200200
assert np.shares_memory(arr_before, arr_after)
201201

202202

203+
@pytest.mark.parametrize(
204+
"data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
205+
)
206+
def test_dataframe_from_series(using_copy_on_write, data, dtype):
207+
ser = Series(data, dtype=dtype)
208+
ser_orig = ser.copy()
209+
df = DataFrame(ser, dtype=dtype)
210+
assert np.shares_memory(get_array(ser), get_array(df, 0))
211+
if using_copy_on_write:
212+
assert not df._mgr._has_no_reference(0)
213+
214+
df.iloc[0, 0] = data[-1]
215+
if using_copy_on_write:
216+
tm.assert_series_equal(ser, ser_orig)
217+
218+
219+
def test_dataframe_from_series_different_dtype(using_copy_on_write):
220+
ser = Series([1, 2], dtype="int64")
221+
df = DataFrame(ser, dtype="int32")
222+
assert not np.shares_memory(get_array(ser), get_array(df, 0))
223+
if using_copy_on_write:
224+
assert df._mgr._has_no_reference(0)
225+
226+
227+
def test_dataframe_from_series_infer_datetime(using_copy_on_write):
228+
ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object)
229+
df = DataFrame(ser)
230+
assert not np.shares_memory(get_array(ser), get_array(df, 0))
231+
if using_copy_on_write:
232+
assert df._mgr._has_no_reference(0)
233+
234+
203235
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
204236
def test_dataframe_from_dict_of_series_with_dtype(index):
205237
# Variant of above, but now passing a dtype that causes a copy

0 commit comments

Comments
 (0)