Skip to content

Commit b880e49

Browse files
TST: add copy/view test for setting columns with an array/series (pandas-dev#47070)
* TST: add copy/view test for setting columns with an array/series * Update pandas/tests/copy_view/test_setitem.py * address feedback
1 parent d6c30f2 commit b880e49

File tree

2 files changed

+104
-0
lines changed

2 files changed

+104
-0
lines changed

pandas/conftest.py

+8
Original file line numberDiff line numberDiff line change
@@ -1836,3 +1836,11 @@ def using_array_manager():
18361836
Fixture to check if the array manager is being used.
18371837
"""
18381838
return pd.options.mode.data_manager == "array"
1839+
1840+
1841+
@pytest.fixture
1842+
def using_copy_on_write():
1843+
"""
1844+
Fixture to check if Copy-on-Write is enabled.
1845+
"""
1846+
return False
+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import numpy as np
2+
3+
from pandas import (
4+
DataFrame,
5+
Index,
6+
RangeIndex,
7+
Series,
8+
)
9+
import pandas._testing as tm
10+
11+
# -----------------------------------------------------------------------------
12+
# Copy/view behaviour for the values that are set in a DataFrame
13+
14+
15+
def test_set_column_with_array():
16+
# Case: setting an array as a new column (df[col] = arr) copies that data
17+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
18+
arr = np.array([1, 2, 3], dtype="int64")
19+
20+
df["c"] = arr
21+
22+
# the array data is copied
23+
assert not np.shares_memory(df["c"].values, arr)
24+
# and thus modifying the array does not modify the DataFrame
25+
arr[0] = 0
26+
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
27+
28+
29+
def test_set_column_with_series(using_copy_on_write):
30+
# Case: setting a series as a new column (df[col] = s) copies that data
31+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
32+
ser = Series([1, 2, 3])
33+
34+
df["c"] = ser
35+
36+
if using_copy_on_write:
37+
# with CoW we can delay the copy
38+
assert np.shares_memory(df["c"].values, ser.values)
39+
else:
40+
# the series data is copied
41+
assert not np.shares_memory(df["c"].values, ser.values)
42+
43+
# and modifying the series does not modify the DataFrame
44+
ser.iloc[0] = 0
45+
assert ser.iloc[0] == 0
46+
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
47+
48+
49+
def test_set_column_with_index(using_copy_on_write):
50+
# Case: setting an index as a new column (df[col] = idx) copies that data
51+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
52+
idx = Index([1, 2, 3])
53+
54+
df["c"] = idx
55+
56+
# the index data is copied
57+
assert not np.shares_memory(df["c"].values, idx.values)
58+
59+
# and thus modifying the index does not modify the DataFrame
60+
idx.values[0] = 0
61+
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
62+
63+
# however, in case of a RangeIndex, we currently don't copy the cached
64+
# "materialized" values
65+
idx = RangeIndex(1, 4)
66+
arr = idx.values
67+
68+
df["d"] = idx
69+
70+
if using_copy_on_write:
71+
assert not np.shares_memory(df["d"].values, arr)
72+
arr[0] = 0
73+
tm.assert_series_equal(df["d"], Series([1, 2, 3], name="d"))
74+
else:
75+
assert np.shares_memory(df["d"].values, arr)
76+
arr[0] = 0
77+
tm.assert_series_equal(df["d"], Series([0, 2, 3], name="d"))
78+
79+
80+
def test_set_columns_with_dataframe(using_copy_on_write):
81+
# Case: setting a DataFrame as new columns copies that data
82+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
83+
df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
84+
85+
df[["c", "d"]] = df2
86+
87+
if using_copy_on_write:
88+
# with CoW we can delay the copy
89+
assert np.shares_memory(df["c"].values, df2["c"].values)
90+
else:
91+
# the data is copied
92+
assert not np.shares_memory(df["c"].values, df2["c"].values)
93+
94+
# and modifying the set DataFrame does not modify the original DataFrame
95+
df2.iloc[0, 0] = 0
96+
tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c"))

0 commit comments

Comments
 (0)