From f2049e82319c0528e333dd6d37c65afa96084806 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 11 Feb 2024 20:26:52 +0100 Subject: [PATCH] CoW: Remove more using_copy_on_write fixtures --- .../copy_view/index/test_datetimeindex.py | 27 +- pandas/tests/copy_view/index/test_index.py | 72 ++--- .../tests/copy_view/index/test_periodindex.py | 5 +- .../copy_view/index/test_timedeltaindex.py | 5 +- pandas/tests/copy_view/test_array.py | 125 ++++----- pandas/tests/copy_view/test_astype.py | 133 +++------ .../test_chained_assignment_deprecation.py | 27 +- pandas/tests/copy_view/test_clip.py | 68 ++--- pandas/tests/copy_view/test_constructors.py | 148 ++++------ .../copy_view/test_core_functionalities.py | 63 ++--- pandas/tests/copy_view/test_indexing.py | 256 +++++------------- pandas/tests/copy_view/test_internals.py | 22 +- pandas/tests/copy_view/test_interp_fillna.py | 224 +++++---------- pandas/tests/copy_view/test_replace.py | 252 ++++++----------- pandas/tests/copy_view/test_setitem.py | 41 +-- 15 files changed, 475 insertions(+), 993 deletions(-) diff --git a/pandas/tests/copy_view/index/test_datetimeindex.py b/pandas/tests/copy_view/index/test_datetimeindex.py index 5dd1f45a94ff3..6194ea8b122c9 100644 --- a/pandas/tests/copy_view/index/test_datetimeindex.py +++ b/pandas/tests/copy_view/index/test_datetimeindex.py @@ -14,50 +14,43 @@ @pytest.mark.parametrize("box", [lambda x: x, DatetimeIndex]) -def test_datetimeindex(using_copy_on_write, box): +def test_datetimeindex(box): dt = date_range("2019-12-31", periods=3, freq="D") ser = Series(dt) idx = box(DatetimeIndex(ser)) expected = idx.copy(deep=True) ser.iloc[0] = Timestamp("2020-12-31") - if using_copy_on_write: - tm.assert_index_equal(idx, expected) + tm.assert_index_equal(idx, expected) -def test_datetimeindex_tz_convert(using_copy_on_write): +def test_datetimeindex_tz_convert(): dt = date_range("2019-12-31", periods=3, freq="D", tz="Europe/Berlin") ser = Series(dt) idx = DatetimeIndex(ser).tz_convert("US/Eastern") expected = idx.copy(deep=True) ser.iloc[0] = Timestamp("2020-12-31", tz="Europe/Berlin") - if using_copy_on_write: - tm.assert_index_equal(idx, expected) + tm.assert_index_equal(idx, expected) -def test_datetimeindex_tz_localize(using_copy_on_write): +def test_datetimeindex_tz_localize(): dt = date_range("2019-12-31", periods=3, freq="D") ser = Series(dt) idx = DatetimeIndex(ser).tz_localize("Europe/Berlin") expected = idx.copy(deep=True) ser.iloc[0] = Timestamp("2020-12-31") - if using_copy_on_write: - tm.assert_index_equal(idx, expected) + tm.assert_index_equal(idx, expected) -def test_datetimeindex_isocalendar(using_copy_on_write): +def test_datetimeindex_isocalendar(): dt = date_range("2019-12-31", periods=3, freq="D") ser = Series(dt) df = DatetimeIndex(ser).isocalendar() expected = df.index.copy(deep=True) ser.iloc[0] = Timestamp("2020-12-31") - if using_copy_on_write: - tm.assert_index_equal(df.index, expected) + tm.assert_index_equal(df.index, expected) -def test_index_values(using_copy_on_write): +def test_index_values(): idx = date_range("2019-12-31", periods=3, freq="D") result = idx.values - if using_copy_on_write: - assert result.flags.writeable is False - else: - assert result.flags.writeable is True + assert result.flags.writeable is False diff --git a/pandas/tests/copy_view/index/test_index.py b/pandas/tests/copy_view/index/test_index.py index 9a788c5fd4193..e51f5658cf437 100644 --- a/pandas/tests/copy_view/index/test_index.py +++ b/pandas/tests/copy_view/index/test_index.py @@ -19,18 +19,15 @@ def index_view(index_data): return idx, view -def test_set_index_update_column(using_copy_on_write): +def test_set_index_update_column(): df = DataFrame({"a": [1, 2], "b": 1}) df = df.set_index("a", drop=False) expected = df.index.copy(deep=True) df.iloc[0, 0] = 100 - if using_copy_on_write: - tm.assert_index_equal(df.index, expected) - else: - tm.assert_index_equal(df.index, Index([100, 2], name="a")) + tm.assert_index_equal(df.index, expected) -def test_set_index_drop_update_column(using_copy_on_write): +def test_set_index_drop_update_column(): df = DataFrame({"a": [1, 2], "b": 1.5}) view = df[:] df = df.set_index("a", drop=True) @@ -39,31 +36,25 @@ def test_set_index_drop_update_column(using_copy_on_write): tm.assert_index_equal(df.index, expected) -def test_set_index_series(using_copy_on_write): +def test_set_index_series(): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) df = df.set_index(ser) expected = df.index.copy(deep=True) ser.iloc[0] = 100 - if using_copy_on_write: - tm.assert_index_equal(df.index, expected) - else: - tm.assert_index_equal(df.index, Index([100, 11])) + tm.assert_index_equal(df.index, expected) -def test_assign_index_as_series(using_copy_on_write): +def test_assign_index_as_series(): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) df.index = ser expected = df.index.copy(deep=True) ser.iloc[0] = 100 - if using_copy_on_write: - tm.assert_index_equal(df.index, expected) - else: - tm.assert_index_equal(df.index, Index([100, 11])) + tm.assert_index_equal(df.index, expected) -def test_assign_index_as_index(using_copy_on_write): +def test_assign_index_as_index(): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) rhs_index = Index(ser) @@ -71,24 +62,18 @@ def test_assign_index_as_index(using_copy_on_write): rhs_index = None # overwrite to clear reference expected = df.index.copy(deep=True) ser.iloc[0] = 100 - if using_copy_on_write: - tm.assert_index_equal(df.index, expected) - else: - tm.assert_index_equal(df.index, Index([100, 11])) + tm.assert_index_equal(df.index, expected) -def test_index_from_series(using_copy_on_write): +def test_index_from_series(): ser = Series([1, 2]) idx = Index(ser) expected = idx.copy(deep=True) ser.iloc[0] = 100 - if using_copy_on_write: - tm.assert_index_equal(idx, expected) - else: - tm.assert_index_equal(idx, Index([100, 2])) + tm.assert_index_equal(idx, expected) -def test_index_from_series_copy(using_copy_on_write): +def test_index_from_series_copy(): ser = Series([1, 2]) idx = Index(ser, copy=True) # noqa: F841 arr = get_array(ser) @@ -96,16 +81,13 @@ def test_index_from_series_copy(using_copy_on_write): assert np.shares_memory(get_array(ser), arr) -def test_index_from_index(using_copy_on_write): +def test_index_from_index(): ser = Series([1, 2]) idx = Index(ser) idx = Index(idx) expected = idx.copy(deep=True) ser.iloc[0] = 100 - if using_copy_on_write: - tm.assert_index_equal(idx, expected) - else: - tm.assert_index_equal(idx, Index([100, 2])) + tm.assert_index_equal(idx, expected) @pytest.mark.parametrize( @@ -135,44 +117,36 @@ def test_index_from_index(using_copy_on_write): "astype", ], ) -def test_index_ops(using_copy_on_write, func, request): +def test_index_ops(func, request): idx, view_ = index_view([1, 2]) expected = idx.copy(deep=True) if "astype" in request.node.callspec.id: expected = expected.astype("Int64") idx = func(idx) view_.iloc[0, 0] = 100 - if using_copy_on_write: - tm.assert_index_equal(idx, expected, check_names=False) + tm.assert_index_equal(idx, expected, check_names=False) -def test_infer_objects(using_copy_on_write): +def test_infer_objects(): idx, view_ = index_view(["a", "b"]) expected = idx.copy(deep=True) idx = idx.infer_objects(copy=False) view_.iloc[0, 0] = "aaaa" - if using_copy_on_write: - tm.assert_index_equal(idx, expected, check_names=False) + tm.assert_index_equal(idx, expected, check_names=False) -def test_index_to_frame(using_copy_on_write): +def test_index_to_frame(): idx = Index([1, 2, 3], name="a") expected = idx.copy(deep=True) df = idx.to_frame() - if using_copy_on_write: - assert np.shares_memory(get_array(df, "a"), idx._values) - assert not df._mgr._has_no_reference(0) - else: - assert not np.shares_memory(get_array(df, "a"), idx._values) + assert np.shares_memory(get_array(df, "a"), idx._values) + assert not df._mgr._has_no_reference(0) df.iloc[0, 0] = 100 tm.assert_index_equal(idx, expected) -def test_index_values(using_copy_on_write): +def test_index_values(): idx = Index([1, 2, 3]) result = idx.values - if using_copy_on_write: - assert result.flags.writeable is False - else: - assert result.flags.writeable is True + assert result.flags.writeable is False diff --git a/pandas/tests/copy_view/index/test_periodindex.py b/pandas/tests/copy_view/index/test_periodindex.py index 753304a1a8963..2887b191038d2 100644 --- a/pandas/tests/copy_view/index/test_periodindex.py +++ b/pandas/tests/copy_view/index/test_periodindex.py @@ -14,11 +14,10 @@ @pytest.mark.parametrize("box", [lambda x: x, PeriodIndex]) -def test_periodindex(using_copy_on_write, box): +def test_periodindex(box): dt = period_range("2019-12-31", periods=3, freq="D") ser = Series(dt) idx = box(PeriodIndex(ser)) expected = idx.copy(deep=True) ser.iloc[0] = Period("2020-12-31") - if using_copy_on_write: - tm.assert_index_equal(idx, expected) + tm.assert_index_equal(idx, expected) diff --git a/pandas/tests/copy_view/index/test_timedeltaindex.py b/pandas/tests/copy_view/index/test_timedeltaindex.py index 5b9832093fded..6984df86b00e3 100644 --- a/pandas/tests/copy_view/index/test_timedeltaindex.py +++ b/pandas/tests/copy_view/index/test_timedeltaindex.py @@ -20,11 +20,10 @@ lambda x: TimedeltaIndex(TimedeltaIndex(x)), ], ) -def test_timedeltaindex(using_copy_on_write, cons): +def test_timedeltaindex(cons): dt = timedelta_range("1 day", periods=3) ser = Series(dt) idx = cons(ser) expected = idx.copy(deep=True) ser.iloc[0] = Timedelta("5 days") - if using_copy_on_write: - tm.assert_index_equal(idx, expected) + tm.assert_index_equal(idx, expected) diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index 02941a2fc3481..bb238d08bd9bd 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -18,29 +18,24 @@ [lambda ser: ser.values, lambda ser: np.asarray(ser)], ids=["values", "asarray"], ) -def test_series_values(using_copy_on_write, method): +def test_series_values(method): ser = Series([1, 2, 3], name="name") ser_orig = ser.copy() arr = method(ser) - if using_copy_on_write: - # .values still gives a view but is read-only - assert np.shares_memory(arr, get_array(ser, "name")) - assert arr.flags.writeable is False - - # mutating series through arr therefore doesn't work - with pytest.raises(ValueError, match="read-only"): - arr[0] = 0 - tm.assert_series_equal(ser, ser_orig) - - # mutating the series itself still works - ser.iloc[0] = 0 - assert ser.values[0] == 0 - else: - assert arr.flags.writeable is True + # .values still gives a view but is read-only + assert np.shares_memory(arr, get_array(ser, "name")) + assert arr.flags.writeable is False + + # mutating series through arr therefore doesn't work + with pytest.raises(ValueError, match="read-only"): arr[0] = 0 - assert ser.iloc[0] == 0 + tm.assert_series_equal(ser, ser_orig) + + # mutating the series itself still works + ser.iloc[0] = 0 + assert ser.values[0] == 0 @pytest.mark.parametrize( @@ -48,54 +43,44 @@ def test_series_values(using_copy_on_write, method): [lambda df: df.values, lambda df: np.asarray(df)], ids=["values", "asarray"], ) -def test_dataframe_values(using_copy_on_write, method): +def test_dataframe_values(method): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df_orig = df.copy() arr = method(df) - if using_copy_on_write: - # .values still gives a view but is read-only - assert np.shares_memory(arr, get_array(df, "a")) - assert arr.flags.writeable is False - - # mutating series through arr therefore doesn't work - with pytest.raises(ValueError, match="read-only"): - arr[0, 0] = 0 - tm.assert_frame_equal(df, df_orig) - - # mutating the series itself still works - df.iloc[0, 0] = 0 - assert df.values[0, 0] == 0 - else: - assert arr.flags.writeable is True + # .values still gives a view but is read-only + assert np.shares_memory(arr, get_array(df, "a")) + assert arr.flags.writeable is False + + # mutating series through arr therefore doesn't work + with pytest.raises(ValueError, match="read-only"): arr[0, 0] = 0 - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) + # mutating the series itself still works + df.iloc[0, 0] = 0 + assert df.values[0, 0] == 0 -def test_series_to_numpy(using_copy_on_write): + +def test_series_to_numpy(): ser = Series([1, 2, 3], name="name") ser_orig = ser.copy() # default: copy=False, no dtype or NAs arr = ser.to_numpy() - if using_copy_on_write: - # to_numpy still gives a view but is read-only - assert np.shares_memory(arr, get_array(ser, "name")) - assert arr.flags.writeable is False - - # mutating series through arr therefore doesn't work - with pytest.raises(ValueError, match="read-only"): - arr[0] = 0 - tm.assert_series_equal(ser, ser_orig) - - # mutating the series itself still works - ser.iloc[0] = 0 - assert ser.values[0] == 0 - else: - assert arr.flags.writeable is True + # to_numpy still gives a view but is read-only + assert np.shares_memory(arr, get_array(ser, "name")) + assert arr.flags.writeable is False + + # mutating series through arr therefore doesn't work + with pytest.raises(ValueError, match="read-only"): arr[0] = 0 - assert ser.iloc[0] == 0 + tm.assert_series_equal(ser, ser_orig) + + # mutating the series itself still works + ser.iloc[0] = 0 + assert ser.values[0] == 0 # specify copy=False gives a writeable array ser = Series([1, 2, 3], name="name") @@ -110,48 +95,33 @@ def test_series_to_numpy(using_copy_on_write): assert arr.flags.writeable is True -def test_series_array_ea_dtypes(using_copy_on_write): +def test_series_array_ea_dtypes(): ser = Series([1, 2, 3], dtype="Int64") arr = np.asarray(ser, dtype="int64") assert np.shares_memory(arr, get_array(ser)) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False arr = np.asarray(ser) assert np.shares_memory(arr, get_array(ser)) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False -def test_dataframe_array_ea_dtypes(using_copy_on_write): +def test_dataframe_array_ea_dtypes(): df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") arr = np.asarray(df, dtype="int64") assert np.shares_memory(arr, get_array(df, "a")) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False arr = np.asarray(df) assert np.shares_memory(arr, get_array(df, "a")) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False -def test_dataframe_array_string_dtype(using_copy_on_write): +def test_dataframe_array_string_dtype(): df = DataFrame({"a": ["a", "b"]}, dtype="string") arr = np.asarray(df) assert np.shares_memory(arr, get_array(df, "a")) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False def test_dataframe_multiple_numpy_dtypes(): @@ -161,13 +131,10 @@ def test_dataframe_multiple_numpy_dtypes(): assert arr.flags.writeable is True -def test_values_is_ea(using_copy_on_write): +def test_values_is_ea(): df = DataFrame({"a": date_range("2012-01-01", periods=3)}) arr = np.asarray(df) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False def test_empty_dataframe(): diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index f280e2143fee0..5a9b3463cf63f 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -17,22 +17,17 @@ from pandas.tests.copy_view.util import get_array -def test_astype_single_dtype(using_copy_on_write): +def test_astype_single_dtype(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": 1.5}) df_orig = df.copy() df2 = df.astype("float64") - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column/block df2.iloc[0, 2] = 5.5 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) tm.assert_frame_equal(df, df_orig) # mutating parent also doesn't update result @@ -43,22 +38,17 @@ def test_astype_single_dtype(using_copy_on_write): @pytest.mark.parametrize("dtype", ["int64", "Int64"]) @pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"]) -def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype): +def test_astype_avoids_copy(dtype, new_dtype): if new_dtype == "int64[pyarrow]": pytest.importorskip("pyarrow") df = DataFrame({"a": [1, 2, 3]}, dtype=dtype) df_orig = df.copy() df2 = df.astype(new_dtype) - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column/block df2.iloc[0, 0] = 10 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) # mutating parent also doesn't update result @@ -68,7 +58,7 @@ def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype): @pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"]) -def test_astype_different_target_dtype(using_copy_on_write, dtype): +def test_astype_different_target_dtype(dtype): if dtype == "int32[pyarrow]": pytest.importorskip("pyarrow") df = DataFrame({"a": [1, 2, 3]}) @@ -76,8 +66,7 @@ def test_astype_different_target_dtype(using_copy_on_write, dtype): df2 = df.astype(dtype) assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - if using_copy_on_write: - assert df2._mgr._has_no_reference(0) + assert df2._mgr._has_no_reference(0) df2.iloc[0, 0] = 5 tm.assert_frame_equal(df, df_orig) @@ -98,15 +87,11 @@ def test_astype_numpy_to_ea(): @pytest.mark.parametrize( "dtype, new_dtype", [("object", "string"), ("string", "object")] ) -def test_astype_string_and_object(using_copy_on_write, dtype, new_dtype): +def test_astype_string_and_object(dtype, new_dtype): df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype) df_orig = df.copy() df2 = df.astype(new_dtype) - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = "x" tm.assert_frame_equal(df, df_orig) @@ -115,17 +100,11 @@ def test_astype_string_and_object(using_copy_on_write, dtype, new_dtype): @pytest.mark.parametrize( "dtype, new_dtype", [("object", "string"), ("string", "object")] ) -def test_astype_string_and_object_update_original( - using_copy_on_write, dtype, new_dtype -): +def test_astype_string_and_object_update_original(dtype, new_dtype): df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype) df2 = df.astype(new_dtype) df_orig = df2.copy() - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df.iloc[0, 0] = "x" tm.assert_frame_equal(df2, df_orig) @@ -151,63 +130,53 @@ def test_astype_string_read_only_on_pickle_roundrip(): tm.assert_series_equal(base, base_copy) -def test_astype_dict_dtypes(using_copy_on_write): +def test_astype_dict_dtypes(): df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")} ) df_orig = df.copy() df2 = df.astype({"a": "float64", "c": "float64"}) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column/block df2.iloc[0, 2] = 5.5 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) df2.iloc[0, 1] = 10 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) tm.assert_frame_equal(df, df_orig) -def test_astype_different_datetime_resos(using_copy_on_write): +def test_astype_different_datetime_resos(): df = DataFrame({"a": date_range("2019-12-31", periods=2, freq="D")}) result = df.astype("datetime64[ms]") assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) - if using_copy_on_write: - assert result._mgr._has_no_reference(0) + assert result._mgr._has_no_reference(0) -def test_astype_different_timezones(using_copy_on_write): +def test_astype_different_timezones(): df = DataFrame( {"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")} ) result = df.astype("datetime64[ns, Europe/Berlin]") - if using_copy_on_write: - assert not result._mgr._has_no_reference(0) - assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) + assert not result._mgr._has_no_reference(0) + assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) -def test_astype_different_timezones_different_reso(using_copy_on_write): +def test_astype_different_timezones_different_reso(): df = DataFrame( {"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")} ) result = df.astype("datetime64[ms, Europe/Berlin]") - if using_copy_on_write: - assert result._mgr._has_no_reference(0) - assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) + assert result._mgr._has_no_reference(0) + assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) -def test_astype_arrow_timestamp(using_copy_on_write): +def test_astype_arrow_timestamp(): pytest.importorskip("pyarrow") df = DataFrame( { @@ -219,19 +188,16 @@ def test_astype_arrow_timestamp(using_copy_on_write): dtype="M8[ns]", ) result = df.astype("timestamp[ns][pyarrow]") - if using_copy_on_write: - assert not result._mgr._has_no_reference(0) - if pa_version_under12p0: - assert not np.shares_memory( - get_array(df, "a"), get_array(result, "a")._pa_array - ) - else: - assert np.shares_memory( - get_array(df, "a"), get_array(result, "a")._pa_array - ) - - -def test_convert_dtypes_infer_objects(using_copy_on_write): + assert not result._mgr._has_no_reference(0) + if pa_version_under12p0: + assert not np.shares_memory( + get_array(df, "a"), get_array(result, "a")._pa_array + ) + else: + assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array) + + +def test_convert_dtypes_infer_objects(): ser = Series(["a", "b", "c"]) ser_orig = ser.copy() result = ser.convert_dtypes( @@ -241,30 +207,19 @@ def test_convert_dtypes_infer_objects(using_copy_on_write): convert_string=False, ) - if using_copy_on_write: - assert np.shares_memory(get_array(ser), get_array(result)) - else: - assert not np.shares_memory(get_array(ser), get_array(result)) - + assert np.shares_memory(get_array(ser), get_array(result)) result.iloc[0] = "x" tm.assert_series_equal(ser, ser_orig) -def test_convert_dtypes(using_copy_on_write): +def test_convert_dtypes(): df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]}) df_orig = df.copy() df2 = df.convert_dtypes() - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert np.shares_memory(get_array(df2, "d"), get_array(df, "d")) - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert not np.shares_memory(get_array(df2, "d"), get_array(df, "d")) - + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "d"), get_array(df, "d")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) df2.iloc[0, 0] = "x" tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index e1a76e66c107f..a54ecce4ffbec 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -7,30 +7,11 @@ import pandas._testing as tm -def test_methods_iloc_warn(using_copy_on_write): - if not using_copy_on_write: - df = DataFrame({"a": [1, 2, 3], "b": 1}) - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].replace(1, 5, inplace=True) - - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].fillna(1, inplace=True) - - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].interpolate(inplace=True) - - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].ffill(inplace=True) - - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].bfill(inplace=True) - - # TODO(CoW-warn) expand the cases @pytest.mark.parametrize( "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] ) -def test_series_setitem(indexer, using_copy_on_write): +def test_series_setitem(indexer): # ensure we only get a single warning for those typical cases of chained # assignment df = DataFrame({"a": [1, 2, 3], "b": 1}) @@ -40,11 +21,7 @@ def test_series_setitem(indexer, using_copy_on_write): with pytest.warns() as record: df["a"][indexer] = 0 assert len(record) == 1 - if using_copy_on_write: - assert record[0].category == ChainedAssignmentError - else: - assert record[0].category == FutureWarning - assert "ChainedAssignmentError" in record[0].message.args[0] + assert record[0].category == ChainedAssignmentError @pytest.mark.parametrize( diff --git a/pandas/tests/copy_view/test_clip.py b/pandas/tests/copy_view/test_clip.py index c18a2e1e65d26..56df33db6d416 100644 --- a/pandas/tests/copy_view/test_clip.py +++ b/pandas/tests/copy_view/test_clip.py @@ -5,23 +5,20 @@ from pandas.tests.copy_view.util import get_array -def test_clip_inplace_reference(using_copy_on_write): +def test_clip_inplace_reference(): df = DataFrame({"a": [1.5, 2, 3]}) df_copy = df.copy() arr_a = get_array(df, "a") view = df[:] df.clip(lower=2, inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), arr_a) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - tm.assert_frame_equal(df_copy, view) - else: - assert np.shares_memory(get_array(df, "a"), arr_a) + assert not np.shares_memory(get_array(df, "a"), arr_a) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) + tm.assert_frame_equal(df_copy, view) -def test_clip_inplace_reference_no_op(using_copy_on_write): +def test_clip_inplace_reference_no_op(): df = DataFrame({"a": [1.5, 2, 3]}) df_copy = df.copy() arr_a = get_array(df, "a") @@ -30,63 +27,46 @@ def test_clip_inplace_reference_no_op(using_copy_on_write): assert np.shares_memory(get_array(df, "a"), arr_a) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) - assert not view._mgr._has_no_reference(0) - tm.assert_frame_equal(df_copy, view) + assert not df._mgr._has_no_reference(0) + assert not view._mgr._has_no_reference(0) + tm.assert_frame_equal(df_copy, view) -def test_clip_inplace(using_copy_on_write): +def test_clip_inplace(): df = DataFrame({"a": [1.5, 2, 3]}) arr_a = get_array(df, "a") df.clip(lower=2, inplace=True) assert np.shares_memory(get_array(df, "a"), arr_a) + assert df._mgr._has_no_reference(0) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - -def test_clip(using_copy_on_write): +def test_clip(): df = DataFrame({"a": [1.5, 2, 3]}) df_orig = df.copy() df2 = df.clip(lower=2) assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) tm.assert_frame_equal(df_orig, df) -def test_clip_no_op(using_copy_on_write): +def test_clip_no_op(): df = DataFrame({"a": [1.5, 2, 3]}) df2 = df.clip(lower=0) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not df._mgr._has_no_reference(0) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) -def test_clip_chained_inplace(using_copy_on_write): +def test_clip_chained_inplace(): df = DataFrame({"a": [1, 4, 2], "b": 1}) df_orig = df.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["a"].clip(1, 2, inplace=True) - tm.assert_frame_equal(df, df_orig) - - with tm.raises_chained_assignment_error(): - df[["a"]].clip(1, 2, inplace=True) - tm.assert_frame_equal(df, df_orig) - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - df["a"].clip(1, 2, inplace=True) - - with tm.assert_produces_warning(None): - df[["a"]].clip(1, 2, inplace=True) - - with tm.assert_produces_warning(None): - df[df["a"] > 1].clip(1, 2, inplace=True) + with tm.raises_chained_assignment_error(): + df["a"].clip(1, 2, inplace=True) + tm.assert_frame_equal(df, df_orig) + + with tm.raises_chained_assignment_error(): + df[["a"]].clip(1, 2, inplace=True) + tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 77c07d11e3381..f7e78146c86eb 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -21,7 +21,7 @@ @pytest.mark.parametrize("dtype", [None, "int64"]) -def test_series_from_series(dtype, using_copy_on_write): +def test_series_from_series(dtype): # Case: constructing a Series from another Series object follows CoW rules: # a new object is returned and thus mutations are not propagated ser = Series([1, 2, 3], name="name") @@ -32,36 +32,23 @@ def test_series_from_series(dtype, using_copy_on_write): # the shallow copy still shares memory assert np.shares_memory(get_array(ser), get_array(result)) - if using_copy_on_write: - assert result._mgr.blocks[0].refs.has_reference() + assert result._mgr.blocks[0].refs.has_reference() - if using_copy_on_write: - # mutating new series copy doesn't mutate original - result.iloc[0] = 0 - assert ser.iloc[0] == 1 - # mutating triggered a copy-on-write -> no longer shares memory - assert not np.shares_memory(get_array(ser), get_array(result)) - else: - # mutating shallow copy does mutate original - result.iloc[0] = 0 - assert ser.iloc[0] == 0 - # and still shares memory - assert np.shares_memory(get_array(ser), get_array(result)) + # mutating new series copy doesn't mutate original + result.iloc[0] = 0 + assert ser.iloc[0] == 1 + # mutating triggered a copy-on-write -> no longer shares memory + assert not np.shares_memory(get_array(ser), get_array(result)) # the same when modifying the parent result = Series(ser, dtype=dtype) - if using_copy_on_write: - # mutating original doesn't mutate new series - ser.iloc[0] = 0 - assert result.iloc[0] == 1 - else: - # mutating original does mutate shallow copy - ser.iloc[0] = 0 - assert result.iloc[0] == 0 + # mutating original doesn't mutate new series + ser.iloc[0] = 0 + assert result.iloc[0] == 1 -def test_series_from_series_with_reindex(using_copy_on_write): +def test_series_from_series_with_reindex(): # Case: constructing a Series from another Series with specifying an index # that potentially requires a reindex of the values ser = Series([1, 2, 3], name="name") @@ -77,17 +64,13 @@ def test_series_from_series_with_reindex(using_copy_on_write): result = Series(ser, index=index) assert np.shares_memory(ser.values, result.values) result.iloc[0] = 0 - if using_copy_on_write: - assert ser.iloc[0] == 1 - else: - assert ser.iloc[0] == 0 + assert ser.iloc[0] == 1 # ensure that if an actual reindex is needed, we don't have any refs # (mutating the result wouldn't trigger CoW) result = Series(ser, index=[0, 1, 2, 3]) assert not np.shares_memory(ser.values, result.values) - if using_copy_on_write: - assert not result._mgr.blocks[0].refs.has_reference() + assert not result._mgr.blocks[0].refs.has_reference() @pytest.mark.parametrize("dtype", [None, "int64"]) @@ -95,25 +78,18 @@ def test_series_from_series_with_reindex(using_copy_on_write): @pytest.mark.parametrize( "arr", [np.array([1, 2, 3], dtype="int64"), pd.array([1, 2, 3], dtype="Int64")] ) -def test_series_from_array(using_copy_on_write, idx, dtype, arr): +def test_series_from_array(idx, dtype, arr): ser = Series(arr, dtype=dtype, index=idx) ser_orig = ser.copy() data = getattr(arr, "_data", arr) - if using_copy_on_write: - assert not np.shares_memory(get_array(ser), data) - else: - assert np.shares_memory(get_array(ser), data) + assert not np.shares_memory(get_array(ser), data) arr[0] = 100 - if using_copy_on_write: - tm.assert_series_equal(ser, ser_orig) - else: - expected = Series([100, 2, 3], dtype=dtype if dtype is not None else arr.dtype) - tm.assert_series_equal(ser, expected) + tm.assert_series_equal(ser, ser_orig) @pytest.mark.parametrize("copy", [True, False, None]) -def test_series_from_array_different_dtype(using_copy_on_write, copy): +def test_series_from_array_different_dtype(copy): arr = np.array([1, 2, 3], dtype="int64") ser = Series(arr, dtype="int32", copy=copy) assert not np.shares_memory(get_array(ser), arr) @@ -128,39 +104,34 @@ def test_series_from_array_different_dtype(using_copy_on_write, copy): TimedeltaIndex([Timedelta("1 days"), Timedelta("2 days")]), ], ) -def test_series_from_index(using_copy_on_write, idx): +def test_series_from_index(idx): ser = Series(idx) expected = idx.copy(deep=True) - if using_copy_on_write: - assert np.shares_memory(get_array(ser), get_array(idx)) - assert not ser._mgr._has_no_reference(0) - else: - assert not np.shares_memory(get_array(ser), get_array(idx)) + assert np.shares_memory(get_array(ser), get_array(idx)) + assert not ser._mgr._has_no_reference(0) ser.iloc[0] = ser.iloc[1] tm.assert_index_equal(idx, expected) -def test_series_from_index_different_dtypes(using_copy_on_write): +def test_series_from_index_different_dtypes(): idx = Index([1, 2, 3], dtype="int64") ser = Series(idx, dtype="int32") assert not np.shares_memory(get_array(ser), get_array(idx)) - if using_copy_on_write: - assert ser._mgr._has_no_reference(0) + assert ser._mgr._has_no_reference(0) -def test_series_from_block_manager_different_dtype(using_copy_on_write): +def test_series_from_block_manager_different_dtype(): ser = Series([1, 2, 3], dtype="int64") msg = "Passing a SingleBlockManager to Series" with tm.assert_produces_warning(DeprecationWarning, match=msg): ser2 = Series(ser._mgr, dtype="int32") assert not np.shares_memory(get_array(ser), get_array(ser2)) - if using_copy_on_write: - assert ser2._mgr._has_no_reference(0) + assert ser2._mgr._has_no_reference(0) @pytest.mark.parametrize("use_mgr", [True, False]) @pytest.mark.parametrize("columns", [None, ["a"]]) -def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr): +def test_dataframe_constructor_mgr_or_df(columns, use_mgr): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() @@ -177,18 +148,14 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr): assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) new_df.iloc[0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) - tm.assert_frame_equal(df, df_orig) - else: - assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) - tm.assert_frame_equal(df, new_df) + assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("dtype", [None, "int64", "Int64"]) @pytest.mark.parametrize("index", [None, [0, 1, 2]]) @pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]]) -def test_dataframe_from_dict_of_series(using_copy_on_write, columns, index, dtype): +def test_dataframe_from_dict_of_series(columns, index, dtype): # Case: constructing a DataFrame from Series objects with copy=False # has to do a lazy following CoW rules # (the default for DataFrame(dict) is still to copy to ensure consolidation) @@ -208,11 +175,8 @@ def test_dataframe_from_dict_of_series(using_copy_on_write, columns, index, dtyp # mutating the new dataframe doesn't mutate original result.iloc[0, 0] = 10 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(s1)) - tm.assert_series_equal(s1, s1_orig) - else: - assert s1.iloc[0] == 10 + assert not np.shares_memory(get_array(result, "a"), get_array(s1)) + tm.assert_series_equal(s1, s1_orig) # the same when modifying the parent series s1 = Series([1, 2, 3]) @@ -221,11 +185,8 @@ def test_dataframe_from_dict_of_series(using_copy_on_write, columns, index, dtyp {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False ) s1.iloc[0] = 10 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(s1)) - tm.assert_frame_equal(result, expected) - else: - assert result.iloc[0, 0] == 10 + assert not np.shares_memory(get_array(result, "a"), get_array(s1)) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("dtype", [None, "int64"]) @@ -249,38 +210,30 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype): @pytest.mark.parametrize( "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)] ) -def test_dataframe_from_series_or_index( - using_copy_on_write, data, dtype, index_or_series -): +def test_dataframe_from_series_or_index(data, dtype, index_or_series): obj = index_or_series(data, dtype=dtype) obj_orig = obj.copy() df = DataFrame(obj, dtype=dtype) assert np.shares_memory(get_array(obj), get_array(df, 0)) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) + assert not df._mgr._has_no_reference(0) df.iloc[0, 0] = data[-1] - if using_copy_on_write: - tm.assert_equal(obj, obj_orig) + tm.assert_equal(obj, obj_orig) -def test_dataframe_from_series_or_index_different_dtype( - using_copy_on_write, index_or_series -): +def test_dataframe_from_series_or_index_different_dtype(index_or_series): obj = index_or_series([1, 2], dtype="int64") df = DataFrame(obj, dtype="int32") assert not np.shares_memory(get_array(obj), get_array(df, 0)) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) -def test_dataframe_from_series_infer_datetime(using_copy_on_write): +def test_dataframe_from_series_infer_datetime(): ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object) with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): df = DataFrame(ser) assert not np.shares_memory(get_array(ser), get_array(df, 0)) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) @pytest.mark.parametrize("index", [None, [0, 1, 2]]) @@ -301,38 +254,33 @@ def test_dataframe_from_dict_of_series_with_dtype(index): @pytest.mark.parametrize("copy", [False, None, True]) -def test_frame_from_numpy_array(using_copy_on_write, copy): +def test_frame_from_numpy_array(copy): arr = np.array([[1, 2], [3, 4]]) df = DataFrame(arr, copy=copy) - if using_copy_on_write and copy is not False or copy is True: + if copy is not False or copy is True: assert not np.shares_memory(get_array(df, 0), arr) else: assert np.shares_memory(get_array(df, 0), arr) -def test_dataframe_from_records_with_dataframe(using_copy_on_write): +def test_dataframe_from_records_with_dataframe(): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() with tm.assert_produces_warning(FutureWarning): df2 = DataFrame.from_records(df) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) + assert not df._mgr._has_no_reference(0) assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) df2.iloc[0, 0] = 100 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - tm.assert_frame_equal(df, df2) + tm.assert_frame_equal(df, df_orig) -def test_frame_from_dict_of_index(using_copy_on_write): +def test_frame_from_dict_of_index(): idx = Index([1, 2, 3]) expected = idx.copy(deep=True) df = DataFrame({"a": idx}, copy=False) assert np.shares_memory(get_array(df, "a"), idx._values) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) + assert not df._mgr._has_no_reference(0) - df.iloc[0, 0] = 100 - tm.assert_index_equal(idx, expected) + df.iloc[0, 0] = 100 + tm.assert_index_equal(idx, expected) diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py index b37e1a3718ac1..70d7112ddbd89 100644 --- a/pandas/tests/copy_view/test_core_functionalities.py +++ b/pandas/tests/copy_view/test_core_functionalities.py @@ -6,18 +6,17 @@ from pandas.tests.copy_view.util import get_array -def test_assigning_to_same_variable_removes_references(using_copy_on_write): +def test_assigning_to_same_variable_removes_references(): df = DataFrame({"a": [1, 2, 3]}) df = df.reset_index() - if using_copy_on_write: - assert df._mgr._has_no_reference(1) + assert df._mgr._has_no_reference(1) arr = get_array(df, "a") df.iloc[0, 1] = 100 # Write into a assert np.shares_memory(arr, get_array(df, "a")) -def test_setitem_dont_track_unnecessary_references(using_copy_on_write): +def test_setitem_dont_track_unnecessary_references(): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) df["b"] = 100 @@ -28,7 +27,7 @@ def test_setitem_dont_track_unnecessary_references(using_copy_on_write): assert np.shares_memory(arr, get_array(df, "a")) -def test_setitem_with_view_copies(using_copy_on_write): +def test_setitem_with_view_copies(): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) view = df[:] expected = df.copy() @@ -36,12 +35,11 @@ def test_setitem_with_view_copies(using_copy_on_write): df["b"] = 100 arr = get_array(df, "a") df.iloc[0, 0] = 100 # Check that we correctly track reference - if using_copy_on_write: - assert not np.shares_memory(arr, get_array(df, "a")) - tm.assert_frame_equal(view, expected) + assert not np.shares_memory(arr, get_array(df, "a")) + tm.assert_frame_equal(view, expected) -def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write, request): +def test_setitem_with_view_invalidated_does_not_copy(request): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) view = df[:] @@ -51,19 +49,16 @@ def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write, reques # TODO(CoW-warn) false positive? -> block gets split because of `df["b"] = 100` # which introduces additional refs, even when those of `view` go out of scopes df.iloc[0, 0] = 100 - if using_copy_on_write: - # Setitem split the block. Since the old block shared data with view - # all the new blocks are referencing view and each other. When view - # goes out of scope, they don't share data with any other block, - # so we should not trigger a copy - mark = pytest.mark.xfail( - reason="blk.delete does not track references correctly" - ) - request.applymarker(mark) - assert np.shares_memory(arr, get_array(df, "a")) - - -def test_out_of_scope(using_copy_on_write): + # Setitem split the block. Since the old block shared data with view + # all the new blocks are referencing view and each other. When view + # goes out of scope, they don't share data with any other block, + # so we should not trigger a copy + mark = pytest.mark.xfail(reason="blk.delete does not track references correctly") + request.applymarker(mark) + assert np.shares_memory(arr, get_array(df, "a")) + + +def test_out_of_scope(): def func(): df = DataFrame({"a": [1, 2], "b": 1.5, "c": 1}) # create some subset @@ -71,32 +66,28 @@ def func(): return result result = func() - if using_copy_on_write: - assert not result._mgr.blocks[0].refs.has_reference() - assert not result._mgr.blocks[1].refs.has_reference() + assert not result._mgr.blocks[0].refs.has_reference() + assert not result._mgr.blocks[1].refs.has_reference() -def test_delete(using_copy_on_write): +def test_delete(): df = DataFrame( np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"] ) del df["b"] - if using_copy_on_write: - assert not df._mgr.blocks[0].refs.has_reference() - assert not df._mgr.blocks[1].refs.has_reference() + assert not df._mgr.blocks[0].refs.has_reference() + assert not df._mgr.blocks[1].refs.has_reference() df = df[["a"]] - if using_copy_on_write: - assert not df._mgr.blocks[0].refs.has_reference() + assert not df._mgr.blocks[0].refs.has_reference() -def test_delete_reference(using_copy_on_write): +def test_delete_reference(): df = DataFrame( np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"] ) x = df[:] del df["b"] - if using_copy_on_write: - assert df._mgr.blocks[0].refs.has_reference() - assert df._mgr.blocks[1].refs.has_reference() - assert x._mgr.blocks[0].refs.has_reference() + assert df._mgr.blocks[0].refs.has_reference() + assert df._mgr.blocks[1].refs.has_reference() + assert x._mgr.blocks[0].refs.has_reference() diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index da72e89b23ca0..09d13677eef62 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -48,7 +48,7 @@ def make_series(*args, **kwargs): # Indexing operations taking subset + modifying the subset/parent -def test_subset_column_selection(backend, using_copy_on_write): +def test_subset_column_selection(backend): # Case: taking a subset of the columns of a DataFrame # + afterwards modifying the subset _, DataFrame, _ = backend @@ -69,7 +69,7 @@ def test_subset_column_selection(backend, using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_subset_column_selection_modify_parent(backend, using_copy_on_write): +def test_subset_column_selection_modify_parent(backend): # Case: taking a subset of the columns of a DataFrame # + afterwards modifying the parent _, DataFrame, _ = backend @@ -77,22 +77,20 @@ def test_subset_column_selection_modify_parent(backend, using_copy_on_write): subset = df[["a", "c"]] - if using_copy_on_write: - # the subset shares memory ... - assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - # ... but parent uses CoW parent when it is modified + # the subset shares memory ... + assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) + # ... but parent uses CoW parent when it is modified df.iloc[0, 0] = 0 assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - if using_copy_on_write: - # different column/block still shares memory - assert np.shares_memory(get_array(subset, "c"), get_array(df, "c")) + # different column/block still shares memory + assert np.shares_memory(get_array(subset, "c"), get_array(df, "c")) expected = DataFrame({"a": [1, 2, 3], "c": [0.1, 0.2, 0.3]}) tm.assert_frame_equal(subset, expected) -def test_subset_row_slice(backend, using_copy_on_write): +def test_subset_row_slice(backend): # Case: taking a subset of the rows of a DataFrame using a slice # + afterwards modifying the subset _, DataFrame, _ = backend @@ -160,7 +158,6 @@ def test_subset_loc_rows_columns( dtype, row_indexer, column_indexer, - using_copy_on_write, ): # Case: taking a subset of the rows+columns of a DataFrame using .loc # + afterwards modifying the subset @@ -176,14 +173,6 @@ def test_subset_loc_rows_columns( subset = df.loc[row_indexer, column_indexer] - # a few corner cases _do_ actually modify the parent (with both row and column - # slice, and in case of BlockManager with single block) - mutate_parent = ( - isinstance(row_indexer, slice) - and isinstance(column_indexer, slice) - and (dtype == "int64" and dtype_backend == "numpy" and not using_copy_on_write) - ) - # modifying the subset never modifies the parent subset.iloc[0, 0] = 0 @@ -191,8 +180,6 @@ def test_subset_loc_rows_columns( {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) ) tm.assert_frame_equal(subset, expected) - if mutate_parent: - df_orig.iloc[1, 1] = 0 tm.assert_frame_equal(df, df_orig) @@ -214,7 +201,6 @@ def test_subset_iloc_rows_columns( dtype, row_indexer, column_indexer, - using_copy_on_write, ): # Case: taking a subset of the rows+columns of a DataFrame using .iloc # + afterwards modifying the subset @@ -230,14 +216,6 @@ def test_subset_iloc_rows_columns( subset = df.iloc[row_indexer, column_indexer] - # a few corner cases _do_ actually modify the parent (with both row and column - # slice, and in case of BlockManager with single block) - mutate_parent = ( - isinstance(row_indexer, slice) - and isinstance(column_indexer, slice) - and (dtype == "int64" and dtype_backend == "numpy" and not using_copy_on_write) - ) - # modifying the subset never modifies the parent subset.iloc[0, 0] = 0 @@ -245,8 +223,6 @@ def test_subset_iloc_rows_columns( {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) ) tm.assert_frame_equal(subset, expected) - if mutate_parent: - df_orig.iloc[1, 1] = 0 tm.assert_frame_equal(df, df_orig) @@ -322,7 +298,7 @@ def test_subset_set_column(backend): @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_set_column_with_loc(backend, using_copy_on_write, dtype): +def test_subset_set_column_with_loc(backend, dtype): # Case: setting a single column with loc on a viewing subset # -> subset.loc[:, col] = value _, DataFrame, _ = backend @@ -332,12 +308,7 @@ def test_subset_set_column_with_loc(backend, using_copy_on_write, dtype): df_orig = df.copy() subset = df[1:3] - if using_copy_on_write: - subset.loc[:, "a"] = np.array([10, 11], dtype="int64") - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(None): - subset.loc[:, "a"] = np.array([10, 11], dtype="int64") + subset.loc[:, "a"] = np.array([10, 11], dtype="int64") subset._mgr._verify_integrity() expected = DataFrame( @@ -345,16 +316,11 @@ def test_subset_set_column_with_loc(backend, using_copy_on_write, dtype): index=range(1, 3), ) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - # original parent dataframe is not modified (CoW) - tm.assert_frame_equal(df, df_orig) - else: - # original parent dataframe is actually updated - df_orig.loc[1:3, "a"] = np.array([10, 11], dtype="int64") - tm.assert_frame_equal(df, df_orig) + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) -def test_subset_set_column_with_loc2(backend, using_copy_on_write): +def test_subset_set_column_with_loc2(backend): # Case: setting a single column with loc on a viewing subset # -> subset.loc[:, col] = value # separate test for case of DataFrame of a single column -> takes a separate @@ -364,29 +330,19 @@ def test_subset_set_column_with_loc2(backend, using_copy_on_write): df_orig = df.copy() subset = df[1:3] - if using_copy_on_write: - subset.loc[:, "a"] = 0 - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(None): - subset.loc[:, "a"] = 0 + subset.loc[:, "a"] = 0 subset._mgr._verify_integrity() expected = DataFrame({"a": [0, 0]}, index=range(1, 3)) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - # original parent dataframe is not modified (CoW) - tm.assert_frame_equal(df, df_orig) - else: - # original parent dataframe is actually updated - df_orig.loc[1:3, "a"] = 0 - tm.assert_frame_equal(df, df_orig) + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_set_columns(backend, using_copy_on_write, dtype): +def test_subset_set_columns(backend, dtype): # Case: setting multiple columns on a viewing subset # -> subset[[col1, col2]] = value dtype_backend, DataFrame, _ = backend @@ -417,7 +373,7 @@ def test_subset_set_columns(backend, using_copy_on_write, dtype): [slice("a", "b"), np.array([True, True, False]), ["a", "b"]], ids=["slice", "mask", "array"], ) -def test_subset_set_with_column_indexer(backend, indexer, using_copy_on_write): +def test_subset_set_with_column_indexer(backend, indexer): # Case: setting multiple columns with a column indexer on a viewing subset # -> subset.loc[:, [col1, col2]] = value _, DataFrame, _ = backend @@ -425,25 +381,12 @@ def test_subset_set_with_column_indexer(backend, indexer, using_copy_on_write): df_orig = df.copy() subset = df[1:3] - if using_copy_on_write: - subset.loc[:, indexer] = 0 - else: - with pd.option_context("chained_assignment", "warn"): - # As of 2.0, this setitem attempts (successfully) to set values - # inplace, so the assignment is not chained. - subset.loc[:, indexer] = 0 + subset.loc[:, indexer] = 0 subset._mgr._verify_integrity() expected = DataFrame({"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3)) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - # pre-2.0, in the mixed case with BlockManager, only column "a" - # would be mutated in the parent frame. this changed with the - # enforcement of GH#45333 - df_orig.loc[1:2, ["a", "b"]] = 0 - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( @@ -473,7 +416,6 @@ def test_subset_chained_getitem( backend, method, dtype, - using_copy_on_write, ): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour @@ -483,31 +425,17 @@ def test_subset_chained_getitem( ) df_orig = df.copy() - # when not using CoW, it depends on whether we have a single block or not - # and whether we are slicing the columns -> in that case we have a view - test_callspec = request.node.callspec.id - subset_is_view = test_callspec in ( - "numpy-single-block-column-iloc-slice", - "numpy-single-block-column-loc-slice", - ) - # modify subset -> don't modify parent subset = method(df) subset.iloc[0, 0] = 0 - if using_copy_on_write or (not subset_is_view): - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) # modify parent -> don't modify subset subset = method(df) df.iloc[0, 0] = 0 expected = DataFrame({"a": [1, 2], "b": [4, 5]}) - if using_copy_on_write or not subset_is_view: - tm.assert_frame_equal(subset, expected) - else: - assert subset.iloc[0, 0] == 0 + tm.assert_frame_equal(subset, expected) @pytest.mark.parametrize( @@ -548,7 +476,7 @@ def test_subset_chained_getitem_column(backend, dtype): ], ids=["getitem", "iloc", "loc", "long-chain"], ) -def test_subset_chained_getitem_series(backend, method, using_copy_on_write): +def test_subset_chained_getitem_series(backend, method): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour _, _, Series = backend @@ -558,22 +486,16 @@ def test_subset_chained_getitem_series(backend, method, using_copy_on_write): # modify subset -> don't modify parent subset = method(s) subset.iloc[0] = 0 - if using_copy_on_write: - tm.assert_series_equal(s, s_orig) - else: - assert s.iloc[0] == 0 + tm.assert_series_equal(s, s_orig) # modify parent -> don't modify subset subset = s.iloc[0:3].iloc[0:2] s.iloc[0] = 0 expected = Series([1, 2], index=["a", "b"]) - if using_copy_on_write: - tm.assert_series_equal(subset, expected) - else: - assert subset.iloc[0] == 0 + tm.assert_series_equal(subset, expected) -def test_subset_chained_single_block_row(using_copy_on_write): +def test_subset_chained_single_block_row(): # not parametrizing this for dtype backend, since this explicitly tests single block df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy() @@ -581,19 +503,13 @@ def test_subset_chained_single_block_row(using_copy_on_write): # modify subset -> don't modify parent subset = df[:].iloc[0].iloc[0:2] subset.iloc[0] = 0 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) # modify parent -> don't modify subset subset = df[:].iloc[0].iloc[0:2] df.iloc[0, 0] = 0 expected = Series([1, 4], index=["a", "b"], name=0) - if using_copy_on_write: - tm.assert_series_equal(subset, expected) - else: - assert subset.iloc[0] == 0 + tm.assert_series_equal(subset, expected) @pytest.mark.parametrize( @@ -607,7 +523,7 @@ def test_subset_chained_single_block_row(using_copy_on_write): ], ids=["getitem", "loc", "loc-rows", "iloc", "iloc-rows"], ) -def test_null_slice(backend, method, using_copy_on_write): +def test_null_slice(backend, method): # Case: also all variants of indexing with a null slice (:) should return # new objects to ensure we correctly use CoW for the results dtype_backend, DataFrame, _ = backend @@ -621,10 +537,7 @@ def test_null_slice(backend, method, using_copy_on_write): # and those trigger CoW when mutated df2.iloc[0, 0] = 0 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( @@ -636,7 +549,7 @@ def test_null_slice(backend, method, using_copy_on_write): ], ids=["getitem", "loc", "iloc"], ) -def test_null_slice_series(backend, method, using_copy_on_write): +def test_null_slice_series(backend, method): _, _, Series = backend s = Series([1, 2, 3], index=["a", "b", "c"]) s_orig = s.copy() @@ -648,10 +561,7 @@ def test_null_slice_series(backend, method, using_copy_on_write): # and those trigger CoW when mutated s2.iloc[0] = 0 - if using_copy_on_write: - tm.assert_series_equal(s, s_orig) - else: - assert s.iloc[0] == 0 + tm.assert_series_equal(s, s_orig) # TODO add more tests modifying the parent @@ -661,7 +571,7 @@ def test_null_slice_series(backend, method, using_copy_on_write): # Series -- Indexing operations taking subset + modifying the subset/parent -def test_series_getitem_slice(backend, using_copy_on_write): +def test_series_getitem_slice(backend): # Case: taking a slice of a Series + afterwards modifying the subset _, _, Series = backend s = Series([1, 2, 3], index=["a", "b", "c"]) @@ -672,21 +582,16 @@ def test_series_getitem_slice(backend, using_copy_on_write): subset.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(subset), get_array(s)) + assert not np.shares_memory(get_array(subset), get_array(s)) expected = Series([0, 2, 3], index=["a", "b", "c"]) tm.assert_series_equal(subset, expected) - if using_copy_on_write: - # original parent series is not modified (CoW) - tm.assert_series_equal(s, s_orig) - else: - # original parent series is actually updated - assert s.iloc[0] == 0 + # original parent series is not modified (CoW) + tm.assert_series_equal(s, s_orig) -def test_series_getitem_ellipsis(using_copy_on_write): +def test_series_getitem_ellipsis(): # Case: taking a view of a Series using Ellipsis + afterwards modifying the subset s = Series([1, 2, 3]) s_orig = s.copy() @@ -696,18 +601,13 @@ def test_series_getitem_ellipsis(using_copy_on_write): subset.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(subset), get_array(s)) + assert not np.shares_memory(get_array(subset), get_array(s)) expected = Series([0, 2, 3]) tm.assert_series_equal(subset, expected) - if using_copy_on_write: - # original parent series is not modified (CoW) - tm.assert_series_equal(s, s_orig) - else: - # original parent series is actually updated - assert s.iloc[0] == 0 + # original parent series is not modified (CoW) + tm.assert_series_equal(s, s_orig) @pytest.mark.parametrize( @@ -715,9 +615,7 @@ def test_series_getitem_ellipsis(using_copy_on_write): [slice(0, 2), np.array([True, True, False]), np.array([0, 1])], ids=["slice", "mask", "array"], ) -def test_series_subset_set_with_indexer( - backend, indexer_si, indexer, using_copy_on_write -): +def test_series_subset_set_with_indexer(backend, indexer_si, indexer): # Case: setting values in a viewing Series with an indexer _, _, Series = backend s = Series([1, 2, 3], index=["a", "b", "c"]) @@ -737,17 +635,14 @@ def test_series_subset_set_with_indexer( expected = Series([0, 0, 3], index=["a", "b", "c"]) tm.assert_series_equal(subset, expected) - if using_copy_on_write: - tm.assert_series_equal(s, s_orig) - else: - tm.assert_series_equal(s, expected) + tm.assert_series_equal(s, s_orig) # ----------------------------------------------------------------------------- # del operator -def test_del_frame(backend, using_copy_on_write): +def test_del_frame(backend): # Case: deleting a column with `del` on a viewing child dataframe should # not modify parent + update the references dtype_backend, DataFrame, _ = backend @@ -769,11 +664,8 @@ def test_del_frame(backend, using_copy_on_write): df_orig = df.copy() df2.loc[0, "a"] = 100 - if using_copy_on_write: - # modifying child after deleting a column still doesn't update parent - tm.assert_frame_equal(df, df_orig) - else: - assert df.loc[0, "a"] == 100 + # modifying child after deleting a column still doesn't update parent + tm.assert_frame_equal(df, df_orig) def test_del_series(backend): @@ -873,7 +765,7 @@ def test_column_as_series_no_item_cache(request, backend, method): # TODO add tests for other indexing methods on the Series -def test_dataframe_add_column_from_series(backend, using_copy_on_write): +def test_dataframe_add_column_from_series(backend): # Case: adding a new column to a DataFrame from an existing column/series # -> delays copy under CoW _, DataFrame, Series = backend @@ -881,10 +773,7 @@ def test_dataframe_add_column_from_series(backend, using_copy_on_write): s = Series([10, 11, 12]) df["new"] = s - if using_copy_on_write: - assert np.shares_memory(get_array(df, "new"), get_array(s)) - else: - assert not np.shares_memory(get_array(df, "new"), get_array(s)) + assert np.shares_memory(get_array(df, "new"), get_array(s)) # editing series -> doesn't modify column in frame s[0] = 0 @@ -907,9 +796,7 @@ def test_dataframe_add_column_from_series(backend, using_copy_on_write): @pytest.mark.parametrize( "col", [[0.1, 0.2, 0.3], [7, 8, 9]], ids=["mixed-block", "single-block"] ) -def test_set_value_copy_only_necessary_column( - using_copy_on_write, indexer_func, indexer, val, col -): +def test_set_value_copy_only_necessary_column(indexer_func, indexer, val, col): # When setting inplace, only copy column that is modified instead of the whole # block (by splitting the block) df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": col}) @@ -924,31 +811,18 @@ def test_set_value_copy_only_necessary_column( indexer_func(df)[indexer] = val - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) - assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(get_array(df, "c"), get_array(view, "c")) - if val == "a": - assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) - else: - assert np.shares_memory(get_array(df, "a"), get_array(view, "a")) + assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) + assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) + tm.assert_frame_equal(view, df_orig) -def test_series_midx_slice(using_copy_on_write): +def test_series_midx_slice(): ser = Series([1, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]])) ser_orig = ser.copy() result = ser[1] assert np.shares_memory(get_array(ser), get_array(result)) result.iloc[0] = 100 - if using_copy_on_write: - tm.assert_series_equal(ser, ser_orig) - else: - expected = Series( - [100, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]]) - ) - tm.assert_series_equal(ser, expected) + tm.assert_series_equal(ser, ser_orig) def test_getitem_midx_slice(): @@ -963,7 +837,7 @@ def test_getitem_midx_slice(): tm.assert_frame_equal(df_orig, df) -def test_series_midx_tuples_slice(using_copy_on_write): +def test_series_midx_tuples_slice(): ser = Series( [1, 2, 3], index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]), @@ -971,12 +845,11 @@ def test_series_midx_tuples_slice(using_copy_on_write): result = ser[(1, 2)] assert np.shares_memory(get_array(ser), get_array(result)) result.iloc[0] = 100 - if using_copy_on_write: - expected = Series( - [1, 2, 3], - index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]), - ) - tm.assert_series_equal(ser, expected) + expected = Series( + [1, 2, 3], + index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]), + ) + tm.assert_series_equal(ser, expected) def test_midx_read_only_bool_indexer(): @@ -1000,17 +873,14 @@ def mklbl(prefix, n): tm.assert_series_equal(mask, expected_mask) -def test_loc_enlarging_with_dataframe(using_copy_on_write): +def test_loc_enlarging_with_dataframe(): df = DataFrame({"a": [1, 2, 3]}) rhs = DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) rhs_orig = rhs.copy() df.loc[:, ["b", "c"]] = rhs - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(rhs, "b")) - assert np.shares_memory(get_array(df, "c"), get_array(rhs, "c")) - assert not df._mgr._has_no_reference(1) - else: - assert not np.shares_memory(get_array(df, "b"), get_array(rhs, "b")) + assert np.shares_memory(get_array(df, "b"), get_array(rhs, "b")) + assert np.shares_memory(get_array(df, "c"), get_array(rhs, "c")) + assert not df._mgr._has_no_reference(1) df.iloc[0, 1] = 100 tm.assert_frame_equal(rhs, rhs_orig) diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py index f1a4decce623f..07447ab827cec 100644 --- a/pandas/tests/copy_view/test_internals.py +++ b/pandas/tests/copy_view/test_internals.py @@ -6,7 +6,7 @@ from pandas.tests.copy_view.util import get_array -def test_consolidate(using_copy_on_write): +def test_consolidate(): # create unconsolidated DataFrame df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) df["c"] = [4, 5, 6] @@ -35,10 +35,9 @@ def test_consolidate(using_copy_on_write): assert not df._mgr.blocks[2].refs.has_reference() # and modifying subset still doesn't modify parent - if using_copy_on_write: - subset.iloc[0, 1] = 0.0 - assert not df._mgr.blocks[1].refs.has_reference() - assert df.loc[0, "b"] == 0.1 + subset.iloc[0, 1] = 0.0 + assert not df._mgr.blocks[1].refs.has_reference() + assert df.loc[0, "b"] == 0.1 @pytest.mark.parametrize("dtype", [np.intp, np.int8]) @@ -55,7 +54,7 @@ def test_consolidate(using_copy_on_write): ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T), ], ) -def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype): +def test_iset_splits_blocks_inplace(locs, arr, dtype): # Nothing currently calls iset with # more than 1 loc with inplace=True (only happens with inplace=False) # but ensure that it works @@ -75,14 +74,9 @@ def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype): df2._mgr.iset(locs, arr, inplace=True) tm.assert_frame_equal(df, df_orig) - - if using_copy_on_write: - for i, col in enumerate(df.columns): - if i not in locs: - assert np.shares_memory(get_array(df, col), get_array(df2, col)) - else: - for col in df.columns: - assert not np.shares_memory(get_array(df, col), get_array(df2, col)) + for i, col in enumerate(df.columns): + if i not in locs: + assert np.shares_memory(get_array(df, col), get_array(df2, col)) def test_exponential_backoff(): diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index d72600956a6d6..733c6ddb9bd8a 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -3,7 +3,6 @@ from pandas import ( NA, - ArrowDtype, DataFrame, Interval, NaT, @@ -16,7 +15,7 @@ @pytest.mark.parametrize("method", ["pad", "nearest", "linear"]) -def test_interpolate_no_op(using_copy_on_write, method): +def test_interpolate_no_op(method): df = DataFrame({"a": [1, 2]}) df_orig = df.copy() @@ -27,35 +26,26 @@ def test_interpolate_no_op(using_copy_on_write, method): with tm.assert_produces_warning(warn, match=msg): result = df.interpolate(method=method) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) result.iloc[0, 0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("func", ["ffill", "bfill"]) -def test_interp_fill_functions(using_copy_on_write, func): +def test_interp_fill_functions(func): # Check that these takes the same code paths as interpolate df = DataFrame({"a": [1, 2]}) df_orig = df.copy() result = getattr(df, func)() - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) - + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) result.iloc[0, 0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) @@ -63,54 +53,48 @@ def test_interp_fill_functions(using_copy_on_write, func): @pytest.mark.parametrize( "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]] ) -def test_interpolate_triggers_copy(using_copy_on_write, vals, func): +def test_interpolate_triggers_copy(vals, func): df = DataFrame({"a": vals}) result = getattr(df, func)() assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) - if using_copy_on_write: - # Check that we don't have references when triggering a copy - assert result._mgr._has_no_reference(0) + # Check that we don't have references when triggering a copy + assert result._mgr._has_no_reference(0) @pytest.mark.parametrize( "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]] ) -def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals): +def test_interpolate_inplace_no_reference_no_copy(vals): df = DataFrame({"a": vals}) arr = get_array(df, "a") df.interpolate(method="linear", inplace=True) assert np.shares_memory(arr, get_array(df, "a")) - if using_copy_on_write: - # Check that we don't have references when triggering a copy - assert df._mgr._has_no_reference(0) + # Check that we don't have references when triggering a copy + assert df._mgr._has_no_reference(0) @pytest.mark.parametrize( "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]] ) -def test_interpolate_inplace_with_refs(using_copy_on_write, vals): +def test_interpolate_inplace_with_refs(vals): df = DataFrame({"a": [1, np.nan, 2]}) df_orig = df.copy() arr = get_array(df, "a") view = df[:] df.interpolate(method="linear", inplace=True) - - if using_copy_on_write: - # Check that copy was triggered in interpolate and that we don't - # have any references left - assert not np.shares_memory(arr, get_array(df, "a")) - tm.assert_frame_equal(df_orig, view) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - else: - assert np.shares_memory(arr, get_array(df, "a")) + # Check that copy was triggered in interpolate and that we don't + # have any references left + assert not np.shares_memory(arr, get_array(df, "a")) + tm.assert_frame_equal(df_orig, view) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) @pytest.mark.parametrize("func", ["ffill", "bfill"]) @pytest.mark.parametrize("dtype", ["float64", "Float64"]) -def test_interp_fill_functions_inplace(using_copy_on_write, func, dtype): +def test_interp_fill_functions_inplace(func, dtype): # Check that these takes the same code paths as interpolate df = DataFrame({"a": [1, np.nan, 2]}, dtype=dtype) df_orig = df.copy() @@ -119,18 +103,15 @@ def test_interp_fill_functions_inplace(using_copy_on_write, func, dtype): getattr(df, func)(inplace=True) - if using_copy_on_write: - # Check that copy was triggered in interpolate and that we don't - # have any references left - assert not np.shares_memory(arr, get_array(df, "a")) - tm.assert_frame_equal(df_orig, view) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - else: - assert np.shares_memory(arr, get_array(df, "a")) is (dtype == "float64") + # Check that copy was triggered in interpolate and that we don't + # have any references left + assert not np.shares_memory(arr, get_array(df, "a")) + tm.assert_frame_equal(df_orig, view) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) -def test_interpolate_cleaned_fill_method(using_copy_on_write): +def test_interpolate_cleaned_fill_method(): # Check that "method is set to None" case works correctly df = DataFrame({"a": ["a", np.nan, "c"], "b": 1}) df_orig = df.copy() @@ -139,19 +120,14 @@ def test_interpolate_cleaned_fill_method(using_copy_on_write): with tm.assert_produces_warning(FutureWarning, match=msg): result = df.interpolate(method="linear") - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) - + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) result.iloc[0, 0] = Timestamp("2021-12-31") - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) -def test_interpolate_object_convert_no_op(using_copy_on_write): +def test_interpolate_object_convert_no_op(): df = DataFrame({"a": ["a", "b", "c"], "b": 1}) arr_a = get_array(df, "a") msg = "DataFrame.interpolate with method=pad is deprecated" @@ -159,36 +135,33 @@ def test_interpolate_object_convert_no_op(using_copy_on_write): df.interpolate(method="pad", inplace=True) # Now CoW makes a copy, it should not! - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert np.shares_memory(arr_a, get_array(df, "a")) + assert df._mgr._has_no_reference(0) + assert np.shares_memory(arr_a, get_array(df, "a")) -def test_interpolate_object_convert_copies(using_copy_on_write): +def test_interpolate_object_convert_copies(): df = DataFrame({"a": Series([1, 2], dtype=object), "b": 1}) arr_a = get_array(df, "a") msg = "DataFrame.interpolate with method=pad is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): df.interpolate(method="pad", inplace=True) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert not np.shares_memory(arr_a, get_array(df, "a")) + assert df._mgr._has_no_reference(0) + assert not np.shares_memory(arr_a, get_array(df, "a")) -def test_interpolate_downcast(using_copy_on_write): +def test_interpolate_downcast(): df = DataFrame({"a": [1, np.nan, 2.5], "b": 1}) arr_a = get_array(df, "a") msg = "DataFrame.interpolate with method=pad is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): df.interpolate(method="pad", inplace=True, downcast="infer") - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) assert np.shares_memory(arr_a, get_array(df, "a")) -def test_interpolate_downcast_reference_triggers_copy(using_copy_on_write): +def test_interpolate_downcast_reference_triggers_copy(): df = DataFrame({"a": [1, np.nan, 2.5], "b": 1}) df_orig = df.copy() arr_a = get_array(df, "a") @@ -197,45 +170,35 @@ def test_interpolate_downcast_reference_triggers_copy(using_copy_on_write): with tm.assert_produces_warning(FutureWarning, match=msg): df.interpolate(method="pad", inplace=True, downcast="infer") - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert not np.shares_memory(arr_a, get_array(df, "a")) - tm.assert_frame_equal(df_orig, view) - else: - tm.assert_frame_equal(df, view) + assert df._mgr._has_no_reference(0) + assert not np.shares_memory(arr_a, get_array(df, "a")) + tm.assert_frame_equal(df_orig, view) -def test_fillna(using_copy_on_write): +def test_fillna(): df = DataFrame({"a": [1.5, np.nan], "b": 1}) df_orig = df.copy() df2 = df.fillna(5.5) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - else: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) df2.iloc[0, 1] = 100 tm.assert_frame_equal(df_orig, df) -def test_fillna_dict(using_copy_on_write): +def test_fillna_dict(): df = DataFrame({"a": [1.5, np.nan], "b": 1}) df_orig = df.copy() df2 = df.fillna({"a": 100.5}) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - else: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) df2.iloc[0, 1] = 100 tm.assert_frame_equal(df_orig, df) @pytest.mark.parametrize("downcast", [None, False]) -def test_fillna_inplace(using_copy_on_write, downcast): +def test_fillna_inplace(downcast): df = DataFrame({"a": [1.5, np.nan], "b": 1}) arr_a = get_array(df, "a") arr_b = get_array(df, "b") @@ -245,12 +208,11 @@ def test_fillna_inplace(using_copy_on_write, downcast): df.fillna(5.5, inplace=True, downcast=downcast) assert np.shares_memory(get_array(df, "a"), arr_a) assert np.shares_memory(get_array(df, "b"), arr_b) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert df._mgr._has_no_reference(1) + assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(1) -def test_fillna_inplace_reference(using_copy_on_write): +def test_fillna_inplace_reference(): df = DataFrame({"a": [1.5, np.nan], "b": 1}) df_orig = df.copy() arr_a = get_array(df, "a") @@ -258,20 +220,16 @@ def test_fillna_inplace_reference(using_copy_on_write): view = df[:] df.fillna(5.5, inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), arr_a) - assert np.shares_memory(get_array(df, "b"), arr_b) - assert view._mgr._has_no_reference(0) - assert df._mgr._has_no_reference(0) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(get_array(df, "a"), arr_a) - assert np.shares_memory(get_array(df, "b"), arr_b) + assert not np.shares_memory(get_array(df, "a"), arr_a) + assert np.shares_memory(get_array(df, "b"), arr_b) + assert view._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) + tm.assert_frame_equal(view, df_orig) expected = DataFrame({"a": [1.5, 5.5], "b": 1}) tm.assert_frame_equal(df, expected) -def test_fillna_interval_inplace_reference(using_copy_on_write): +def test_fillna_interval_inplace_reference(): # Set dtype explicitly to avoid implicit cast when setting nan ser = Series( interval_range(start=0, end=5), name="a", dtype="interval[float64, right]" @@ -282,94 +240,62 @@ def test_fillna_interval_inplace_reference(using_copy_on_write): view = ser[:] ser.fillna(value=Interval(left=0, right=5), inplace=True) - if using_copy_on_write: - assert not np.shares_memory( - get_array(ser, "a").left.values, get_array(view, "a").left.values - ) - tm.assert_series_equal(view, ser_orig) - else: - assert np.shares_memory( - get_array(ser, "a").left.values, get_array(view, "a").left.values - ) + assert not np.shares_memory( + get_array(ser, "a").left.values, get_array(view, "a").left.values + ) + tm.assert_series_equal(view, ser_orig) -def test_fillna_series_empty_arg(using_copy_on_write): +def test_fillna_series_empty_arg(): ser = Series([1, np.nan, 2]) ser_orig = ser.copy() result = ser.fillna({}) - - if using_copy_on_write: - assert np.shares_memory(get_array(ser), get_array(result)) - else: - assert not np.shares_memory(get_array(ser), get_array(result)) + assert np.shares_memory(get_array(ser), get_array(result)) ser.iloc[0] = 100.5 tm.assert_series_equal(ser_orig, result) -def test_fillna_series_empty_arg_inplace(using_copy_on_write): +def test_fillna_series_empty_arg_inplace(): ser = Series([1, np.nan, 2]) arr = get_array(ser) ser.fillna({}, inplace=True) assert np.shares_memory(get_array(ser), arr) - if using_copy_on_write: - assert ser._mgr._has_no_reference(0) + assert ser._mgr._has_no_reference(0) -def test_fillna_ea_noop_shares_memory( - using_copy_on_write, any_numeric_ea_and_arrow_dtype -): +def test_fillna_ea_noop_shares_memory(any_numeric_ea_and_arrow_dtype): df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype) df_orig = df.copy() df2 = df.fillna(100) assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert not df2._mgr._has_no_reference(1) - elif isinstance(df.dtypes.iloc[0], ArrowDtype): - # arrow is immutable, so no-ops do not need to copy underlying array - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - else: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert not df2._mgr._has_no_reference(1) tm.assert_frame_equal(df_orig, df) df2.iloc[0, 1] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert df2._mgr._has_no_reference(1) - assert df._mgr._has_no_reference(1) + assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert df2._mgr._has_no_reference(1) + assert df._mgr._has_no_reference(1) tm.assert_frame_equal(df_orig, df) -def test_fillna_inplace_ea_noop_shares_memory( - using_copy_on_write, any_numeric_ea_and_arrow_dtype -): +def test_fillna_inplace_ea_noop_shares_memory(any_numeric_ea_and_arrow_dtype): df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype) df_orig = df.copy() view = df[:] df.fillna(100, inplace=True) - - if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) - else: - # MaskedArray can actually respect inplace=True - assert np.shares_memory(get_array(df, "a"), get_array(view, "a")) + assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) - if using_copy_on_write: - assert not df._mgr._has_no_reference(1) - assert not view._mgr._has_no_reference(1) + assert not df._mgr._has_no_reference(1) + assert not view._mgr._has_no_reference(1) df.iloc[0, 1] = 100 - if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: - tm.assert_frame_equal(df_orig, view) - else: - # we actually have a view - tm.assert_frame_equal(df, view) + tm.assert_frame_equal(df_orig, view) def test_fillna_chained_assignment(): diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index f2ee26c0b9009..63254f1244a2e 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -24,21 +24,19 @@ # 1 ], ) -def test_replace(using_copy_on_write, replace_kwargs): +def test_replace(replace_kwargs): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": ["foo", "bar", "baz"]}) df_orig = df.copy() df_replaced = df.replace(**replace_kwargs) - if using_copy_on_write: - if (df_replaced["b"] == df["b"]).all(): - assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b")) - assert np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c")) + if (df_replaced["b"] == df["b"]).all(): + assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c")) # mutating squeezed df triggers a copy-on-write for that column/block df_replaced.loc[0, "c"] = -1 - if using_copy_on_write: - assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c")) + assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c")) if "a" in replace_kwargs["to_replace"]: arr = get_array(df_replaced, "a") @@ -47,26 +45,22 @@ def test_replace(using_copy_on_write, replace_kwargs): tm.assert_frame_equal(df, df_orig) -def test_replace_regex_inplace_refs(using_copy_on_write): +def test_replace_regex_inplace_refs(): df = DataFrame({"a": ["aaa", "bbb"]}) df_orig = df.copy() view = df[:] arr = get_array(df, "a") df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) - if using_copy_on_write: - assert not np.shares_memory(arr, get_array(df, "a")) - assert df._mgr._has_no_reference(0) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(arr, get_array(df, "a")) + assert not np.shares_memory(arr, get_array(df, "a")) + assert df._mgr._has_no_reference(0) + tm.assert_frame_equal(view, df_orig) -def test_replace_regex_inplace(using_copy_on_write): +def test_replace_regex_inplace(): df = DataFrame({"a": ["aaa", "bbb"]}) arr = get_array(df, "a") df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) assert np.shares_memory(arr, get_array(df, "a")) df_orig = df.copy() @@ -75,89 +69,64 @@ def test_replace_regex_inplace(using_copy_on_write): assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) -def test_replace_regex_inplace_no_op(using_copy_on_write): +def test_replace_regex_inplace_no_op(): df = DataFrame({"a": [1, 2]}) arr = get_array(df, "a") df.replace(to_replace=r"^a.$", value="new", inplace=True, regex=True) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) assert np.shares_memory(arr, get_array(df, "a")) df_orig = df.copy() df2 = df.replace(to_replace=r"^x.$", value="new", regex=True) tm.assert_frame_equal(df_orig, df) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) -def test_replace_mask_all_false_second_block(using_copy_on_write): +def test_replace_mask_all_false_second_block(): df = DataFrame({"a": [1.5, 2, 3], "b": 100.5, "c": 1, "d": 2}) df_orig = df.copy() df2 = df.replace(to_replace=1.5, value=55.5) - if using_copy_on_write: - # TODO: Block splitting would allow us to avoid copying b - assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - - else: - assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + # TODO: Block splitting would allow us to avoid copying b + assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) + assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) df2.loc[0, "c"] = 1 tm.assert_frame_equal(df, df_orig) # Original is unchanged - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - # TODO: This should split and not copy the whole block - # assert np.shares_memory(get_array(df, "d"), get_array(df2, "d")) + assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) + assert np.shares_memory(get_array(df, "d"), get_array(df2, "d")) -def test_replace_coerce_single_column(using_copy_on_write): +def test_replace_coerce_single_column(): df = DataFrame({"a": [1.5, 2, 3], "b": 100.5}) df_orig = df.copy() df2 = df.replace(to_replace=1.5, value="a") + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - - else: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - - if using_copy_on_write: - df2.loc[0, "b"] = 0.5 - tm.assert_frame_equal(df, df_orig) # Original is unchanged - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + df2.loc[0, "b"] = 0.5 + tm.assert_frame_equal(df, df_orig) # Original is unchanged + assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) -def test_replace_to_replace_wrong_dtype(using_copy_on_write): +def test_replace_to_replace_wrong_dtype(): df = DataFrame({"a": [1.5, 2, 3], "b": 100.5}) df_orig = df.copy() df2 = df.replace(to_replace="xxx", value=1.5) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - - else: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) df2.loc[0, "b"] = 0.5 tm.assert_frame_equal(df, df_orig) # Original is unchanged - - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) -def test_replace_list_categorical(using_copy_on_write): +def test_replace_list_categorical(): df = DataFrame({"a": ["a", "b", "c"]}, dtype="category") arr = get_array(df, "a") msg = ( @@ -167,8 +136,7 @@ def test_replace_list_categorical(using_copy_on_write): with tm.assert_produces_warning(FutureWarning, match=msg): df.replace(["c"], value="a", inplace=True) assert np.shares_memory(arr.codes, get_array(df, "a").codes) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) df_orig = df.copy() with tm.assert_produces_warning(FutureWarning, match=msg): @@ -178,7 +146,7 @@ def test_replace_list_categorical(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_replace_list_inplace_refs_categorical(using_copy_on_write): +def test_replace_list_inplace_refs_categorical(): df = DataFrame({"a": ["a", "b", "c"]}, dtype="category") view = df[:] df_orig = df.copy() @@ -188,60 +156,47 @@ def test_replace_list_inplace_refs_categorical(using_copy_on_write): ) with tm.assert_produces_warning(FutureWarning, match=msg): df.replace(["c"], value="a", inplace=True) - if using_copy_on_write: - assert not np.shares_memory( - get_array(view, "a").codes, get_array(df, "a").codes - ) - tm.assert_frame_equal(df_orig, view) - else: - # This could be inplace - assert not np.shares_memory( - get_array(view, "a").codes, get_array(df, "a").codes - ) + assert not np.shares_memory(get_array(view, "a").codes, get_array(df, "a").codes) + tm.assert_frame_equal(df_orig, view) @pytest.mark.parametrize("to_replace", [1.5, [1.5], []]) -def test_replace_inplace(using_copy_on_write, to_replace): +def test_replace_inplace(to_replace): df = DataFrame({"a": [1.5, 2, 3]}) arr_a = get_array(df, "a") df.replace(to_replace=1.5, value=15.5, inplace=True) assert np.shares_memory(get_array(df, "a"), arr_a) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) @pytest.mark.parametrize("to_replace", [1.5, [1.5]]) -def test_replace_inplace_reference(using_copy_on_write, to_replace): +def test_replace_inplace_reference(to_replace): df = DataFrame({"a": [1.5, 2, 3]}) arr_a = get_array(df, "a") view = df[:] df.replace(to_replace=to_replace, value=15.5, inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), arr_a) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - else: - assert np.shares_memory(get_array(df, "a"), arr_a) + assert not np.shares_memory(get_array(df, "a"), arr_a) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) @pytest.mark.parametrize("to_replace", ["a", 100.5]) -def test_replace_inplace_reference_no_op(using_copy_on_write, to_replace): +def test_replace_inplace_reference_no_op(to_replace): df = DataFrame({"a": [1.5, 2, 3]}) arr_a = get_array(df, "a") view = df[:] df.replace(to_replace=to_replace, value=15.5, inplace=True) assert np.shares_memory(get_array(df, "a"), arr_a) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) - assert not view._mgr._has_no_reference(0) + assert not df._mgr._has_no_reference(0) + assert not view._mgr._has_no_reference(0) @pytest.mark.parametrize("to_replace", [1, [1]]) @pytest.mark.parametrize("val", [1, 1.5]) -def test_replace_categorical_inplace_reference(using_copy_on_write, val, to_replace): +def test_replace_categorical_inplace_reference(val, to_replace): df = DataFrame({"a": Categorical([1, 2, 3])}) df_orig = df.copy() arr_a = get_array(df, "a") @@ -254,17 +209,14 @@ def test_replace_categorical_inplace_reference(using_copy_on_write, val, to_repl with tm.assert_produces_warning(warn, match=msg): df.replace(to_replace=to_replace, value=val, inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(get_array(df, "a").codes, arr_a.codes) + assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) + tm.assert_frame_equal(view, df_orig) @pytest.mark.parametrize("val", [1, 1.5]) -def test_replace_categorical_inplace(using_copy_on_write, val): +def test_replace_categorical_inplace(val): df = DataFrame({"a": Categorical([1, 2, 3])}) arr_a = get_array(df, "a") msg = ( @@ -276,15 +228,14 @@ def test_replace_categorical_inplace(using_copy_on_write, val): df.replace(to_replace=1, value=val, inplace=True) assert np.shares_memory(get_array(df, "a").codes, arr_a.codes) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) expected = DataFrame({"a": Categorical([val, 2, 3])}) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("val", [1, 1.5]) -def test_replace_categorical(using_copy_on_write, val): +def test_replace_categorical(val): df = DataFrame({"a": Categorical([1, 2, 3])}) df_orig = df.copy() msg = ( @@ -295,9 +246,8 @@ def test_replace_categorical(using_copy_on_write, val): with tm.assert_produces_warning(warn, match=msg): df2 = df.replace(to_replace=1, value=val) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert df2._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) + assert df2._mgr._has_no_reference(0) assert not np.shares_memory(get_array(df, "a").codes, get_array(df2, "a").codes) tm.assert_frame_equal(df, df_orig) @@ -307,7 +257,7 @@ def test_replace_categorical(using_copy_on_write, val): @pytest.mark.parametrize("method", ["where", "mask"]) -def test_masking_inplace(using_copy_on_write, method): +def test_masking_inplace(method): df = DataFrame({"a": [1.5, 2, 3]}) df_orig = df.copy() arr_a = get_array(df, "a") @@ -316,59 +266,43 @@ def test_masking_inplace(using_copy_on_write, method): method = getattr(df, method) method(df["a"] > 1.6, -1, inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), arr_a) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(get_array(df, "a"), arr_a) + assert not np.shares_memory(get_array(df, "a"), arr_a) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) + tm.assert_frame_equal(view, df_orig) -def test_replace_empty_list(using_copy_on_write): +def test_replace_empty_list(): df = DataFrame({"a": [1, 2]}) df2 = df.replace([], []) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert not df._mgr._has_no_reference(0) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not df._mgr._has_no_reference(0) arr_a = get_array(df, "a") df.replace([], []) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "a"), arr_a) - assert not df._mgr._has_no_reference(0) - assert not df2._mgr._has_no_reference(0) + assert np.shares_memory(get_array(df, "a"), arr_a) + assert not df._mgr._has_no_reference(0) + assert not df2._mgr._has_no_reference(0) @pytest.mark.parametrize("value", ["d", None]) -def test_replace_object_list_inplace(using_copy_on_write, value): +def test_replace_object_list_inplace(value): df = DataFrame({"a": ["a", "b", "c"]}) arr = get_array(df, "a") df.replace(["c"], value, inplace=True) - if using_copy_on_write or value is None: - assert np.shares_memory(arr, get_array(df, "a")) - else: - # This could be inplace - assert not np.shares_memory(arr, get_array(df, "a")) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert np.shares_memory(arr, get_array(df, "a")) + assert df._mgr._has_no_reference(0) -def test_replace_list_multiple_elements_inplace(using_copy_on_write): +def test_replace_list_multiple_elements_inplace(): df = DataFrame({"a": [1, 2, 3]}) arr = get_array(df, "a") df.replace([1, 2], 4, inplace=True) - if using_copy_on_write: - assert np.shares_memory(arr, get_array(df, "a")) - assert df._mgr._has_no_reference(0) - else: - assert np.shares_memory(arr, get_array(df, "a")) + assert np.shares_memory(arr, get_array(df, "a")) + assert df._mgr._has_no_reference(0) -def test_replace_list_none(using_copy_on_write): +def test_replace_list_none(): df = DataFrame({"a": ["a", "b", "c"]}) df_orig = df.copy() @@ -378,37 +312,32 @@ def test_replace_list_none(using_copy_on_write): assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) -def test_replace_list_none_inplace_refs(using_copy_on_write): +def test_replace_list_none_inplace_refs(): df = DataFrame({"a": ["a", "b", "c"]}) arr = get_array(df, "a") df_orig = df.copy() view = df[:] df.replace(["a"], value=None, inplace=True) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert not np.shares_memory(arr, get_array(df, "a")) - tm.assert_frame_equal(df_orig, view) - else: - assert np.shares_memory(arr, get_array(df, "a")) + assert df._mgr._has_no_reference(0) + assert not np.shares_memory(arr, get_array(df, "a")) + tm.assert_frame_equal(df_orig, view) -def test_replace_columnwise_no_op_inplace(using_copy_on_write): +def test_replace_columnwise_no_op_inplace(): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) view = df[:] df_orig = df.copy() df.replace({"a": 10}, 100, inplace=True) - if using_copy_on_write: - assert np.shares_memory(get_array(view, "a"), get_array(df, "a")) - df.iloc[0, 0] = 100 - tm.assert_frame_equal(view, df_orig) + assert np.shares_memory(get_array(view, "a"), get_array(df, "a")) + df.iloc[0, 0] = 100 + tm.assert_frame_equal(view, df_orig) -def test_replace_columnwise_no_op(using_copy_on_write): +def test_replace_columnwise_no_op(): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) df_orig = df.copy() df2 = df.replace({"a": 10}, 100) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = 100 tm.assert_frame_equal(df, df_orig) @@ -425,15 +354,12 @@ def test_replace_chained_assignment(): tm.assert_frame_equal(df, df_orig) -def test_replace_listlike(using_copy_on_write): +def test_replace_listlike(): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) df_orig = df.copy() result = df.replace([200, 201], [11, 11]) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) result.iloc[0, 0] = 100 tm.assert_frame_equal(df, df) @@ -443,7 +369,7 @@ def test_replace_listlike(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_replace_listlike_inplace(using_copy_on_write): +def test_replace_listlike_inplace(): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) arr = get_array(df, "a") df.replace([200, 2], [10, 11], inplace=True) @@ -452,9 +378,5 @@ def test_replace_listlike_inplace(using_copy_on_write): view = df[:] df_orig = df.copy() df.replace([200, 3], [10, 11], inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), arr) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(get_array(df, "a"), arr) - tm.assert_frame_equal(df, view) + assert not np.shares_memory(get_array(df, "a"), arr) + tm.assert_frame_equal(view, df_orig) diff --git a/pandas/tests/copy_view/test_setitem.py b/pandas/tests/copy_view/test_setitem.py index 6104699cbc51b..2f28e9826c7a1 100644 --- a/pandas/tests/copy_view/test_setitem.py +++ b/pandas/tests/copy_view/test_setitem.py @@ -28,7 +28,7 @@ def test_set_column_with_array(): tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) -def test_set_column_with_series(using_copy_on_write): +def test_set_column_with_series(): # Case: setting a series as a new column (df[col] = s) copies that data # (with delayed copy with CoW) df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) @@ -36,11 +36,7 @@ def test_set_column_with_series(using_copy_on_write): df["c"] = ser - if using_copy_on_write: - assert np.shares_memory(get_array(df, "c"), get_array(ser)) - else: - # the series data is copied - assert not np.shares_memory(get_array(df, "c"), get_array(ser)) + assert np.shares_memory(get_array(df, "c"), get_array(ser)) # and modifying the series does not modify the DataFrame ser.iloc[0] = 0 @@ -48,7 +44,7 @@ def test_set_column_with_series(using_copy_on_write): tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) -def test_set_column_with_index(using_copy_on_write): +def test_set_column_with_index(): # Case: setting an index as a new column (df[col] = idx) copies that data df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) idx = Index([1, 2, 3]) @@ -66,7 +62,7 @@ def test_set_column_with_index(using_copy_on_write): assert not np.shares_memory(get_array(df, "d"), arr) -def test_set_columns_with_dataframe(using_copy_on_write): +def test_set_columns_with_dataframe(): # Case: setting a DataFrame as new columns copies that data # (with delayed copy with CoW) df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) @@ -74,18 +70,13 @@ def test_set_columns_with_dataframe(using_copy_on_write): df[["c", "d"]] = df2 - if using_copy_on_write: - assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - else: - # the data is copied - assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - + assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) # and modifying the set DataFrame does not modify the original DataFrame df2.iloc[0, 0] = 0 tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c")) -def test_setitem_series_no_copy(using_copy_on_write): +def test_setitem_series_no_copy(): # Case: setting a Series as column into a DataFrame can delay copying that data df = DataFrame({"a": [1, 2, 3]}) rhs = Series([4, 5, 6]) @@ -93,42 +84,39 @@ def test_setitem_series_no_copy(using_copy_on_write): # adding a new column df["b"] = rhs - if using_copy_on_write: - assert np.shares_memory(get_array(rhs), get_array(df, "b")) + assert np.shares_memory(get_array(rhs), get_array(df, "b")) df.iloc[0, 1] = 100 tm.assert_series_equal(rhs, rhs_orig) -def test_setitem_series_no_copy_single_block(using_copy_on_write): +def test_setitem_series_no_copy_single_block(): # Overwriting an existing column that is a single block df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) rhs = Series([4, 5, 6]) rhs_orig = rhs.copy() df["a"] = rhs - if using_copy_on_write: - assert np.shares_memory(get_array(rhs), get_array(df, "a")) + assert np.shares_memory(get_array(rhs), get_array(df, "a")) df.iloc[0, 0] = 100 tm.assert_series_equal(rhs, rhs_orig) -def test_setitem_series_no_copy_split_block(using_copy_on_write): +def test_setitem_series_no_copy_split_block(): # Overwriting an existing column that is part of a larger block df = DataFrame({"a": [1, 2, 3], "b": 1}) rhs = Series([4, 5, 6]) rhs_orig = rhs.copy() df["b"] = rhs - if using_copy_on_write: - assert np.shares_memory(get_array(rhs), get_array(df, "b")) + assert np.shares_memory(get_array(rhs), get_array(df, "b")) df.iloc[0, 1] = 100 tm.assert_series_equal(rhs, rhs_orig) -def test_setitem_series_column_midx_broadcasting(using_copy_on_write): +def test_setitem_series_column_midx_broadcasting(): # Setting a Series to multiple columns will repeat the data # (currently copying the data eagerly) df = DataFrame( @@ -138,11 +126,10 @@ def test_setitem_series_column_midx_broadcasting(using_copy_on_write): rhs = Series([10, 11]) df["a"] = rhs assert not np.shares_memory(get_array(rhs), df._get_column_array(0)) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) -def test_set_column_with_inplace_operator(using_copy_on_write): +def test_set_column_with_inplace_operator(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) # this should not raise any warning