From 07b4c5adcd60302076f6fdf7709d1c256f8d8e20 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 5 Jan 2023 17:00:45 -0800 Subject: [PATCH 1/3] TST: Test CoW with DataFrame.items() --- pandas/tests/copy_view/test_methods.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 0ff359061fd67..4f6d109ae673c 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -634,3 +634,21 @@ def test_droplevel(using_copy_on_write): assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) tm.assert_frame_equal(df, df_orig) + + +def test_items(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + df_orig = df.copy() + for name, vals in df.items(): + ser = Series(vals) + + if using_copy_on_write: + assert np.shares_memory(ser.values, get_array(df, name)) + else: + assert not np.shares_memory(ser.values, get_array(df, name)) + + # mutating df2 triggers a copy-on-write for that column / block + ser.iloc[0] = 0 + + assert not np.shares_memory(ser.values, get_array(df, name)) + tm.assert_frame_equal(df, df_orig) From 00eb76efe02b46000d36beca127265f2765d5113 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 7 Jan 2023 11:42:40 -0800 Subject: [PATCH 2/3] Fix tests --- pandas/tests/copy_view/test_methods.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 4f6d109ae673c..6de91156a4475 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -642,13 +642,14 @@ def test_items(using_copy_on_write): for name, vals in df.items(): ser = Series(vals) - if using_copy_on_write: - assert np.shares_memory(ser.values, get_array(df, name)) - else: - assert not np.shares_memory(ser.values, get_array(df, name)) + assert np.shares_memory(ser.values, get_array(df, name)) # mutating df2 triggers a copy-on-write for that column / block ser.iloc[0] = 0 - assert not np.shares_memory(ser.values, get_array(df, name)) - tm.assert_frame_equal(df, df_orig) + if using_copy_on_write: + assert not np.shares_memory(ser.values, get_array(df, name)) + tm.assert_frame_equal(df, df_orig) + else: + # Original frame will be modified + assert df.loc[0, name] == 0 From 6566da4866ccd2beba141e33c0ea9ddec1fa4f15 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Fri, 20 Jan 2023 09:17:49 -0800 Subject: [PATCH 3/3] update --- pandas/tests/copy_view/test_methods.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index a5b43dd5428ca..3479da3475336 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -994,20 +994,23 @@ def test_squeeze(using_copy_on_write): def test_items(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy() - for name, vals in df.items(): - ser = Series(vals) - assert np.shares_memory(ser.values, get_array(df, name)) + # Test this twice, since the second time, the item cache will be + # triggered, and we want to make sure it still works then. + for i in range(2): + for name, ser in df.items(): - # mutating df2 triggers a copy-on-write for that column / block - ser.iloc[0] = 0 + assert np.shares_memory(get_array(ser, name), get_array(df, name)) - if using_copy_on_write: - assert not np.shares_memory(ser.values, get_array(df, name)) - tm.assert_frame_equal(df, df_orig) - else: - # Original frame will be modified - assert df.loc[0, name] == 0 + # mutating df triggers a copy-on-write for that column / block + ser.iloc[0] = 0 + + if using_copy_on_write: + assert not np.shares_memory(get_array(ser, name), get_array(df, name)) + tm.assert_frame_equal(df, df_orig) + else: + # Original frame will be modified + assert df.loc[0, name] == 0 @pytest.mark.parametrize(