|
1 | 1 | import numpy as np
|
2 | 2 | import pytest
|
3 | 3 |
|
4 |
| -from pandas import Series |
| 4 | +from pandas import ( |
| 5 | + DataFrame, |
| 6 | + Series, |
| 7 | +) |
| 8 | +import pandas._testing as tm |
| 9 | +from pandas.tests.copy_view.util import get_array |
5 | 10 |
|
6 | 11 | # -----------------------------------------------------------------------------
|
7 | 12 | # Copy/view behaviour for Series / DataFrame constructors
|
@@ -75,3 +80,83 @@ def test_series_from_series_with_reindex(using_copy_on_write):
|
75 | 80 | assert not np.shares_memory(ser.values, result.values)
|
76 | 81 | if using_copy_on_write:
|
77 | 82 | assert not result._mgr.blocks[0].refs.has_reference()
|
| 83 | + |
| 84 | + |
| 85 | +@pytest.mark.parametrize("dtype", [None, "int64", "Int64"]) |
| 86 | +@pytest.mark.parametrize("index", [None, [0, 1, 2]]) |
| 87 | +@pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]]) |
| 88 | +def test_dataframe_from_dict_of_series( |
| 89 | + request, using_copy_on_write, columns, index, dtype |
| 90 | +): |
| 91 | + # Case: constructing a DataFrame from Series objects with copy=False |
| 92 | + # has to do a lazy following CoW rules |
| 93 | + # (the default for DataFrame(dict) is still to copy to ensure consolidation) |
| 94 | + s1 = Series([1, 2, 3]) |
| 95 | + s2 = Series([4, 5, 6]) |
| 96 | + s1_orig = s1.copy() |
| 97 | + expected = DataFrame( |
| 98 | + {"a": [1, 2, 3], "b": [4, 5, 6]}, index=index, columns=columns, dtype=dtype |
| 99 | + ) |
| 100 | + |
| 101 | + result = DataFrame( |
| 102 | + {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False |
| 103 | + ) |
| 104 | + |
| 105 | + # the shallow copy still shares memory |
| 106 | + assert np.shares_memory(get_array(result, "a"), get_array(s1)) |
| 107 | + |
| 108 | + # mutating the new dataframe doesn't mutate original |
| 109 | + result.iloc[0, 0] = 10 |
| 110 | + if using_copy_on_write: |
| 111 | + assert not np.shares_memory(get_array(result, "a"), get_array(s1)) |
| 112 | + tm.assert_series_equal(s1, s1_orig) |
| 113 | + else: |
| 114 | + assert s1.iloc[0] == 10 |
| 115 | + |
| 116 | + # the same when modifying the parent series |
| 117 | + s1 = Series([1, 2, 3]) |
| 118 | + s2 = Series([4, 5, 6]) |
| 119 | + result = DataFrame( |
| 120 | + {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False |
| 121 | + ) |
| 122 | + s1.iloc[0] = 10 |
| 123 | + if using_copy_on_write: |
| 124 | + assert not np.shares_memory(get_array(result, "a"), get_array(s1)) |
| 125 | + tm.assert_frame_equal(result, expected) |
| 126 | + else: |
| 127 | + assert result.iloc[0, 0] == 10 |
| 128 | + |
| 129 | + |
| 130 | +@pytest.mark.parametrize("dtype", [None, "int64"]) |
| 131 | +def test_dataframe_from_dict_of_series_with_reindex(dtype): |
| 132 | + # Case: constructing a DataFrame from Series objects with copy=False |
| 133 | + # and passing an index that requires an actual (no-view) reindex -> need |
| 134 | + # to ensure the result doesn't have refs set up to unnecessarily trigger |
| 135 | + # a copy on write |
| 136 | + s1 = Series([1, 2, 3]) |
| 137 | + s2 = Series([4, 5, 6]) |
| 138 | + df = DataFrame({"a": s1, "b": s2}, index=[1, 2, 3], dtype=dtype, copy=False) |
| 139 | + |
| 140 | + # df should own its memory, so mutating shouldn't trigger a copy |
| 141 | + arr_before = get_array(df, "a") |
| 142 | + assert not np.shares_memory(arr_before, get_array(s1)) |
| 143 | + df.iloc[0, 0] = 100 |
| 144 | + arr_after = get_array(df, "a") |
| 145 | + assert np.shares_memory(arr_before, arr_after) |
| 146 | + |
| 147 | + |
| 148 | +@pytest.mark.parametrize("index", [None, [0, 1, 2]]) |
| 149 | +def test_dataframe_from_dict_of_series_with_dtype(index): |
| 150 | + # Variant of above, but now passing a dtype that causes a copy |
| 151 | + # -> need to ensure the result doesn't have refs set up to unnecessarily |
| 152 | + # trigger a copy on write |
| 153 | + s1 = Series([1.0, 2.0, 3.0]) |
| 154 | + s2 = Series([4, 5, 6]) |
| 155 | + df = DataFrame({"a": s1, "b": s2}, index=index, dtype="int64", copy=False) |
| 156 | + |
| 157 | + # df should own its memory, so mutating shouldn't trigger a copy |
| 158 | + arr_before = get_array(df, "a") |
| 159 | + assert not np.shares_memory(arr_before, get_array(s1)) |
| 160 | + df.iloc[0, 0] = 100 |
| 161 | + arr_after = get_array(df, "a") |
| 162 | + assert np.shares_memory(arr_before, arr_after) |
0 commit comments