|
| 1 | +import numpy as np |
| 2 | + |
| 3 | +from pandas import ( |
| 4 | + DataFrame, |
| 5 | + Index, |
| 6 | + RangeIndex, |
| 7 | + Series, |
| 8 | +) |
| 9 | +import pandas._testing as tm |
| 10 | + |
| 11 | +# ----------------------------------------------------------------------------- |
| 12 | +# Copy/view behaviour for the values that are set in a DataFrame |
| 13 | + |
| 14 | + |
| 15 | +def test_set_column_with_array(): |
| 16 | + # Case: setting an array as a new column (df[col] = arr) copies that data |
| 17 | + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) |
| 18 | + arr = np.array([1, 2, 3], dtype="int64") |
| 19 | + |
| 20 | + df["c"] = arr |
| 21 | + |
| 22 | + # the array data is copied |
| 23 | + assert not np.shares_memory(df["c"].values, arr) |
| 24 | + # and thus modifying the array does not modify the DataFrame |
| 25 | + arr[0] = 0 |
| 26 | + tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) |
| 27 | + |
| 28 | + |
| 29 | +def test_set_column_with_series(using_copy_on_write): |
| 30 | + # Case: setting a series as a new column (df[col] = s) copies that data |
| 31 | + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) |
| 32 | + ser = Series([1, 2, 3]) |
| 33 | + |
| 34 | + df["c"] = ser |
| 35 | + |
| 36 | + if using_copy_on_write: |
| 37 | + # with CoW we can delay the copy |
| 38 | + assert np.shares_memory(df["c"].values, ser.values) |
| 39 | + else: |
| 40 | + # the series data is copied |
| 41 | + assert not np.shares_memory(df["c"].values, ser.values) |
| 42 | + |
| 43 | + # and modifying the series does not modify the DataFrame |
| 44 | + ser.iloc[0] = 0 |
| 45 | + assert ser.iloc[0] == 0 |
| 46 | + tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) |
| 47 | + |
| 48 | + |
| 49 | +def test_set_column_with_index(using_copy_on_write): |
| 50 | + # Case: setting an index as a new column (df[col] = idx) copies that data |
| 51 | + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) |
| 52 | + idx = Index([1, 2, 3]) |
| 53 | + |
| 54 | + df["c"] = idx |
| 55 | + |
| 56 | + # the index data is copied |
| 57 | + assert not np.shares_memory(df["c"].values, idx.values) |
| 58 | + |
| 59 | + # and thus modifying the index does not modify the DataFrame |
| 60 | + idx.values[0] = 0 |
| 61 | + tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) |
| 62 | + |
| 63 | + # however, in case of a RangeIndex, we currently don't copy the cached |
| 64 | + # "materialized" values |
| 65 | + idx = RangeIndex(1, 4) |
| 66 | + arr = idx.values |
| 67 | + |
| 68 | + df["d"] = idx |
| 69 | + |
| 70 | + if using_copy_on_write: |
| 71 | + assert not np.shares_memory(df["d"].values, arr) |
| 72 | + arr[0] = 0 |
| 73 | + tm.assert_series_equal(df["d"], Series([1, 2, 3], name="d")) |
| 74 | + else: |
| 75 | + assert np.shares_memory(df["d"].values, arr) |
| 76 | + arr[0] = 0 |
| 77 | + tm.assert_series_equal(df["d"], Series([0, 2, 3], name="d")) |
| 78 | + |
| 79 | + |
| 80 | +def test_set_columns_with_dataframe(using_copy_on_write): |
| 81 | + # Case: setting a DataFrame as new columns copies that data |
| 82 | + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) |
| 83 | + df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]}) |
| 84 | + |
| 85 | + df[["c", "d"]] = df2 |
| 86 | + |
| 87 | + if using_copy_on_write: |
| 88 | + # with CoW we can delay the copy |
| 89 | + assert np.shares_memory(df["c"].values, df2["c"].values) |
| 90 | + else: |
| 91 | + # the data is copied |
| 92 | + assert not np.shares_memory(df["c"].values, df2["c"].values) |
| 93 | + |
| 94 | + # and modifying the set DataFrame does not modify the original DataFrame |
| 95 | + df2.iloc[0, 0] = 0 |
| 96 | + tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c")) |
0 commit comments