Skip to content

Commit 75d7af3

Browse files
Backport PR pandas-dev#52031 on branch 2.0.x (BUG-CoW: DataFrame constructed from Series not respecting CoW) (pandas-dev#52274)
Backport PR pandas-dev#52031: BUG-CoW: DataFrame constructed from Series not respecting CoW Co-authored-by: Patrick Hoefler <[email protected]>
1 parent 77627e3 commit 75d7af3

File tree

3 files changed

+52
-3
lines changed

3 files changed

+52
-3
lines changed

doc/source/whatsnew/v2.0.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,9 @@ Copy-on-Write improvements
190190
of Series objects and specifying ``copy=False``, will now use a lazy copy
191191
of those Series objects for the columns of the DataFrame (:issue:`50777`)
192192

193+
- The :class:`DataFrame` constructor, when constructing a DataFrame from a
194+
:class:`Series` and specifying ``copy=False``, will now respect Copy-on-Write.
195+
193196
- The :class:`DataFrame` constructor, when constructing from a NumPy array,
194197
will now copy the array by default to avoid mutating the :class:`DataFrame`
195198
when mutating the array. Specify ``copy=False`` to get the old behavior.

pandas/core/internals/construction.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ def ndarray_to_mgr(
259259
copy_on_sanitize = False if typ == "array" else copy
260260

261261
vdtype = getattr(values, "dtype", None)
262+
refs = None
262263
if is_1d_only_ea_dtype(vdtype) or is_1d_only_ea_dtype(dtype):
263264
# GH#19157
264265

@@ -290,7 +291,20 @@ def ndarray_to_mgr(
290291
if values.ndim == 1:
291292
values = values.reshape(-1, 1)
292293

293-
elif isinstance(values, (np.ndarray, ExtensionArray, ABCSeries, Index)):
294+
elif isinstance(values, ABCSeries):
295+
if not copy_on_sanitize and (
296+
dtype is None or astype_is_view(values.dtype, dtype)
297+
):
298+
refs = values._references
299+
300+
if copy_on_sanitize:
301+
values = values._values.copy()
302+
else:
303+
values = values._values
304+
305+
values = _ensure_2d(values)
306+
307+
elif isinstance(values, (np.ndarray, ExtensionArray, Index)):
294308
# drop subclass info
295309
_copy = (
296310
copy_on_sanitize
@@ -360,11 +374,11 @@ def ndarray_to_mgr(
360374
]
361375
else:
362376
bp = BlockPlacement(slice(len(columns)))
363-
nb = new_block_2d(values, placement=bp)
377+
nb = new_block_2d(values, placement=bp, refs=refs)
364378
block_values = [nb]
365379
else:
366380
bp = BlockPlacement(slice(len(columns)))
367-
nb = new_block_2d(values, placement=bp)
381+
nb = new_block_2d(values, placement=bp, refs=refs)
368382
block_values = [nb]
369383

370384
if len(columns) == 0:

pandas/tests/copy_view/test_constructors.py

+32
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,38 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
228228
assert np.shares_memory(arr_before, arr_after)
229229

230230

231+
@pytest.mark.parametrize(
232+
"data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
233+
)
234+
def test_dataframe_from_series(using_copy_on_write, data, dtype):
235+
ser = Series(data, dtype=dtype)
236+
ser_orig = ser.copy()
237+
df = DataFrame(ser, dtype=dtype)
238+
assert np.shares_memory(get_array(ser), get_array(df, 0))
239+
if using_copy_on_write:
240+
assert not df._mgr._has_no_reference(0)
241+
242+
df.iloc[0, 0] = data[-1]
243+
if using_copy_on_write:
244+
tm.assert_series_equal(ser, ser_orig)
245+
246+
247+
def test_dataframe_from_series_different_dtype(using_copy_on_write):
248+
ser = Series([1, 2], dtype="int64")
249+
df = DataFrame(ser, dtype="int32")
250+
assert not np.shares_memory(get_array(ser), get_array(df, 0))
251+
if using_copy_on_write:
252+
assert df._mgr._has_no_reference(0)
253+
254+
255+
def test_dataframe_from_series_infer_datetime(using_copy_on_write):
256+
ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object)
257+
df = DataFrame(ser)
258+
assert not np.shares_memory(get_array(ser), get_array(df, 0))
259+
if using_copy_on_write:
260+
assert df._mgr._has_no_reference(0)
261+
262+
231263
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
232264
def test_dataframe_from_dict_of_series_with_dtype(index):
233265
# Variant of above, but now passing a dtype that causes a copy

0 commit comments

Comments
 (0)