From 57cf2e7d3d03c16fe3ab29dd39df9bc0efc63da4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 1 Nov 2022 20:11:19 +0100
Subject: [PATCH 1/7] CoW: avoid getting column from item_cache

---
 pandas/core/frame.py                          |  7 +++++
 pandas/core/groupby/grouper.py                |  1 +
 pandas/tests/copy_view/test_indexing.py       | 28 +++++++++++++++++++
 pandas/tests/frame/indexing/test_xs.py        | 10 +++++--
 pandas/tests/frame/methods/test_cov_corr.py   | 17 +++++++----
 .../frame/methods/test_to_dict_of_blocks.py   | 15 ++++++----
 pandas/tests/frame/test_constructors.py       |  5 ++--
 pandas/tests/generic/test_duplicate_labels.py |  5 ++--
 pandas/tests/groupby/test_groupby.py          | 26 +++++++++++++----
 .../indexing/test_chaining_and_caching.py     |  7 +++--
 pandas/tests/indexing/test_iloc.py            | 11 ++------
 pandas/tests/indexing/test_loc.py             |  7 +++--
 12 files changed, 105 insertions(+), 34 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 17c4bde9d0279..5e49d1aa41ca0 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4230,6 +4230,13 @@ def _clear_item_cache(self) -> None:
 
     def _get_item_cache(self, item: Hashable) -> Series:
         """Return the cached item, item represents a label indexer."""
+        if (
+            get_option("mode.copy_on_write")
+            and get_option("mode.data_manager") == "block"
+        ):
+            loc = self.columns.get_loc(item)
+            return self._ixs(loc, axis=1)
+
         cache = self._item_cache
         res = cache.get(item)
         if res is None:
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 175af95867c8e..bed7b114a544a 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -845,6 +845,7 @@ def is_in_obj(gpr) -> bool:
         if not hasattr(gpr, "name"):
             return False
         try:
+            # TODO(CoW) this check no longer works
             return gpr is obj[gpr.name]
         except (KeyError, IndexError, InvalidIndexError):
             # IndexError reached in e.g. test_skip_group_keys when we pass
diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
index 6abd54db3bf4a..91ab192e538c1 100644
--- a/pandas/tests/copy_view/test_indexing.py
+++ b/pandas/tests/copy_view/test_indexing.py
@@ -620,6 +620,34 @@ def test_column_as_series_set_with_upcast(using_copy_on_write, using_array_manag
         tm.assert_frame_equal(df, df_orig)
 
 
+def test_column_as_series_no_item_cache(using_copy_on_write, using_array_manager):
+    # Case: selecting a single column (which now also uses Copy-on-Write to protect
+    # the view) should always give a new object (i.e. not make use of a cache)
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+
+    s1 = df["a"]
+    s2 = df["a"]
+
+    if using_copy_on_write:
+        assert s1 is not s2
+    else:
+        assert s1 is s2
+
+    if using_copy_on_write or using_array_manager:
+        s1.iloc[0] = 0
+    else:
+        with pd.option_context("chained_assignment", "warn"):
+            with tm.assert_produces_warning(SettingWithCopyWarning):
+                s1.iloc[0] = 0
+
+    if using_copy_on_write:
+        tm.assert_series_equal(s2, df_orig["a"])
+        tm.assert_series_equal(df["a"], df_orig["a"])
+    else:
+        assert s2.iloc[0] == 0
+
+
 # TODO add tests for other indexing methods on the Series
 
 
diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py
index b02870d5d1247..15372e81e4f80 100644
--- a/pandas/tests/frame/indexing/test_xs.py
+++ b/pandas/tests/frame/indexing/test_xs.py
@@ -36,7 +36,8 @@ def four_level_index_dataframe():
 
 
 class TestXS:
-    def test_xs(self, float_frame, datetime_frame):
+    def test_xs(self, float_frame, datetime_frame, using_copy_on_write):
+        float_frame_orig = float_frame.copy()
         idx = float_frame.index[5]
         xs = float_frame.xs(idx)
         for item, value in xs.items():
@@ -66,7 +67,12 @@ def test_xs(self, float_frame, datetime_frame):
         # view is returned if possible
         series = float_frame.xs("A", axis=1)
         series[:] = 5
-        assert (expected == 5).all()
+        if using_copy_on_write:
+            # but with CoW the view shouldn't propagate mutations
+            tm.assert_series_equal(float_frame["A"], float_frame_orig["A"])
+            assert not (expected == 5).all()
+        else:
+            assert (expected == 5).all()
 
     def test_xs_corner(self):
         # pathological mixed-type reordering case
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index 25ef49718fbe7..4c69f72c8c62c 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -209,7 +209,7 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method):
         expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"])
         tm.assert_frame_equal(result, expected)
 
-    def test_corr_item_cache(self):
+    def test_corr_item_cache(self, using_copy_on_write):
         # Check that corr does not lead to incorrect entries in item_cache
 
         df = DataFrame({"A": range(10)})
@@ -220,11 +220,16 @@ def test_corr_item_cache(self):
 
         _ = df.corr()
 
-        # Check that the corr didn't break link between ser and df
-        ser.values[0] = 99
-        assert df.loc[0, "A"] == 99
-        assert df["A"] is ser
-        assert df.values[0, 0] == 99
+        if using_copy_on_write:
+            # TODO(CoW) we should disallow this, so `df` doesn't get updated
+            ser.values[0] = 99
+            assert df.loc[0, "A"] == 99
+        else:
+            # Check that the corr didn't break link between ser and df
+            ser.values[0] = 99
+            assert df.loc[0, "A"] == 99
+            assert df["A"] is ser
+            assert df.values[0, 0] == 99
 
     @pytest.mark.parametrize("length", [2, 20, 200, 2000])
     def test_corr_for_constant_columns(self, length):
diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py
index eb9b78610a112..5d206cfeab7bb 100644
--- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py
+++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py
@@ -45,7 +45,7 @@ def test_no_copy_blocks(self, float_frame, using_copy_on_write):
             assert not _df[column].equals(df[column])
 
 
-def test_to_dict_of_blocks_item_cache():
+def test_to_dict_of_blocks_item_cache(using_copy_on_write):
     # Calling to_dict_of_blocks should not poison item_cache
     df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
     df["c"] = PandasArray(np.array([1, 2, None, 3], dtype=object))
@@ -56,11 +56,16 @@ def test_to_dict_of_blocks_item_cache():
 
     df._to_dict_of_blocks()
 
-    # Check that the to_dict_of_blocks didn't break link between ser and df
-    ser.values[0] = "foo"
-    assert df.loc[0, "b"] == "foo"
+    if using_copy_on_write:
+        # TODO(CoW) we should disallow this, so `df` doesn't get updated
+        ser.values[0] = "foo"
+        assert df.loc[0, "b"] == "foo"
+    else:
+        # Check that the to_dict_of_blocks didn't break link between ser and df
+        ser.values[0] = "foo"
+        assert df.loc[0, "b"] == "foo"
 
-    assert df["b"] is ser
+        assert df["b"] is ser
 
 
 def test_set_change_dtype_slice():
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 5a83c4997b33c..237493aa7b0b7 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -277,9 +277,10 @@ def test_constructor_dtype_nocast_view_dataframe(self, using_copy_on_write):
         df = DataFrame([[1, 2]])
         should_be_view = DataFrame(df, dtype=df[0].dtype)
         if using_copy_on_write:
-            # INFO(CoW) doesn't mutate original
+            # TODO(CoW) doesn't mutate original
             should_be_view.iloc[0, 0] = 99
-            assert df.values[0, 0] == 1
+            # assert df.values[0, 0] == 1
+            assert df.values[0, 0] == 99
         else:
             should_be_view[0][0] = 99
             assert df.values[0, 0] == 99
diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py
index c9036958cbd74..35343ceccbb71 100644
--- a/pandas/tests/generic/test_duplicate_labels.py
+++ b/pandas/tests/generic/test_duplicate_labels.py
@@ -90,8 +90,9 @@ def test_preserve_getitem(self):
         assert df.loc[[0]].flags.allows_duplicate_labels is False
         assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False
 
-    @pytest.mark.xfail(reason="Unclear behavior.")
-    def test_ndframe_getitem_caching_issue(self):
+    def test_ndframe_getitem_caching_issue(self, request, using_copy_on_write):
+        if not using_copy_on_write:
+            request.node.add_marker(pytest.mark.xfail(reason="Unclear behavior."))
         # NDFrame.__getitem__ will cache the first df['A']. May need to
         # invalidate that cache? Update the cached entries?
         df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 26f269d3d4384..bae697ad6c4b7 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1175,19 +1175,30 @@ def test_groupby_wrong_multi_labels():
     tm.assert_frame_equal(result, expected)
 
 
-def test_groupby_series_with_name(df):
+def test_groupby_series_with_name(df, using_copy_on_write):
     msg = "The default value of numeric_only"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby(df["A"]).mean()
         result2 = df.groupby(df["A"], as_index=False).mean()
     assert result.index.name == "A"
-    assert "A" in result2
+    if using_copy_on_write:
+        # TODO(CoW) this shouldn't behave differently? (df["A"] is a new
+        # object each time, so can't use identity to check we are grouping
+        # by a column)
+        assert "A" not in result2
+    else:
+        assert "A" in result2
 
     result = df.groupby([df["A"], df["B"]]).mean()
     result2 = df.groupby([df["A"], df["B"]], as_index=False).mean()
     assert result.index.names == ("A", "B")
-    assert "A" in result2
-    assert "B" in result2
+    if using_copy_on_write:
+        # TODO(CoW) see above
+        assert "A" not in result2
+        assert "B" not in result2
+    else:
+        assert "A" in result2
+        assert "B" in result2
 
 
 def test_seriesgroupby_name_attr(df):
@@ -1236,11 +1247,16 @@ def summarize_random_name(df):
     assert metrics.columns.name is None
 
 
-def test_groupby_nonstring_columns():
+def test_groupby_nonstring_columns(using_copy_on_write):
     df = DataFrame([np.arange(10) for x in range(10)])
     grouped = df.groupby(0)
     result = grouped.mean()
     expected = df.groupby(df[0]).mean()
+    if using_copy_on_write:
+        # TODO(CoW) see test_groupby_series_with_name above - we don't yet
+        # properly detect groupby by column Series
+        expected = expected.set_index(0)
+        expected.index = expected.index.astype("int64")
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py
index 81914e1b8052f..bd0df2e785421 100644
--- a/pandas/tests/indexing/test_chaining_and_caching.py
+++ b/pandas/tests/indexing/test_chaining_and_caching.py
@@ -115,12 +115,15 @@ def test_setitem_cache_updating_slices(self, using_copy_on_write):
         tm.assert_frame_equal(out, expected)
         tm.assert_series_equal(out["A"], expected["A"])
 
-    def test_altering_series_clears_parent_cache(self):
+    def test_altering_series_clears_parent_cache(self, using_copy_on_write):
         # GH #33675
         df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"])
         ser = df["A"]
 
-        assert "A" in df._item_cache
+        if using_copy_on_write:
+            assert "A" not in df._item_cache
+        else:
+            assert "A" in df._item_cache
 
         # Adding a new entry to ser swaps in a new array, so "A" needs to
         #  be removed from df._item_cache
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index db2fe45faf6de..259f607861765 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -900,10 +900,9 @@ def test_series_indexing_zerodim_np_array(self):
         assert result == 1
 
     @td.skip_array_manager_not_yet_implemented
-    def test_iloc_setitem_categorical_updates_inplace(self, using_copy_on_write):
+    def test_iloc_setitem_categorical_updates_inplace(self):
         # Mixed dtype ensures we go through take_split_path in setitem_with_indexer
         cat = Categorical(["A", "B", "C"])
-        cat_original = cat.copy()
         df = DataFrame({1: cat, 2: [1, 2, 3]}, copy=False)
 
         assert tm.shares_memory(df[1], cat)
@@ -913,12 +912,8 @@ def test_iloc_setitem_categorical_updates_inplace(self, using_copy_on_write):
         with tm.assert_produces_warning(FutureWarning, match=msg):
             df.iloc[:, 0] = cat[::-1]
 
-        if not using_copy_on_write:
-            assert tm.shares_memory(df[1], cat)
-            expected = Categorical(["C", "B", "A"], categories=["A", "B", "C"])
-        else:
-            expected = cat_original
-
+        assert tm.shares_memory(df[1], cat)
+        expected = Categorical(["C", "B", "A"], categories=["A", "B", "C"])
         tm.assert_categorical_equal(cat, expected)
 
     def test_iloc_with_boolean_operation(self):
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index ad3cec5824619..c30effb6c596c 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -1114,10 +1114,13 @@ def test_identity_slice_returns_new_object(
         else:
             assert (sliced_df["a"] == 4).all()
 
-        # These should not return copies
+        # These should not return copies (but still new objects for CoW)
         assert original_df is original_df.loc[:, :]
         df = DataFrame(np.random.randn(10, 4))
-        assert df[0] is df.loc[:, 0]
+        if using_copy_on_write:
+            assert df[0] is not df.loc[:, 0]
+        else:
+            assert df[0] is df.loc[:, 0]
 
         # Same tests for Series
         original_series = Series([1, 2, 3, 4, 5, 6])

From 32fa0a42bde7a94b93adbe14b8620433daef90f2 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 1 Nov 2022 20:27:25 +0100
Subject: [PATCH 2/7] also avoid populating the reverse cacher (pointing to
 DataFrame that is parent of Series

---
 pandas/core/generic.py |  5 +++++
 pandas/core/series.py  | 23 +++++++++++++----------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index e022785dbb869..ee02fa77e9873 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3752,6 +3752,11 @@ def _maybe_update_cacher(
         verify_is_copy : bool, default True
             Provide is_copy checks.
         """
+        if (
+            config.get_option("mode.copy_on_write")
+            and config.get_option("mode.data_manager") == "block"
+        ):
+            return
 
         if verify_is_copy:
             self._check_setitem_copy(t="referent")
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 7a1d8235b1407..9cfe003e866cb 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1237,6 +1237,11 @@ def _set_as_cached(self, item, cacher) -> None:
         Set the _cacher attribute on the calling object with a weakref to
         cacher.
         """
+        if (
+            get_option("mode.copy_on_write")
+            and get_option("mode.data_manager") == "block"
+        ):
+            return
         self._cacher = (item, weakref.ref(cacher))
 
     def _clear_item_cache(self) -> None:
@@ -1260,6 +1265,13 @@ def _maybe_update_cacher(
         """
         See NDFrame._maybe_update_cacher.__doc__
         """
+        # for CoW, we never want to update the parent DataFrame cache
+        # if the Series changed, but don't keep track of any cacher
+        if (
+            get_option("mode.copy_on_write")
+            and get_option("mode.data_manager") == "block"
+        ):
+            return
         cacher = getattr(self, "_cacher", None)
         if cacher is not None:
             assert self.ndim == 1
@@ -1269,16 +1281,7 @@ def _maybe_update_cacher(
             # a copy
             if ref is None:
                 del self._cacher
-            # for CoW, we never want to update the parent DataFrame cache
-            # if the Series changed, and always pop the cached item
-            elif (
-                not (
-                    get_option("mode.copy_on_write")
-                    and get_option("mode.data_manager") == "block"
-                )
-                and len(self) == len(ref)
-                and self.name in ref.columns
-            ):
+            elif len(self) == len(ref) and self.name in ref.columns:
                 # GH#42530 self.name must be in ref.columns
                 # to ensure column still in dataframe
                 # otherwise, either self or ref has swapped in new arrays

From 507891406f733d0379579fe5195277feea0de097 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 4 Nov 2022 12:59:44 +0100
Subject: [PATCH 3/7] use equality check in groupby to determine if grouper is
 an existing column

---
 pandas/core/groupby/grouper.py       | 13 ++++++++++++-
 pandas/tests/groupby/test_groupby.py | 26 +++++---------------------
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index bed7b114a544a..7ca42c9f7a774 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -14,6 +14,8 @@
 
 import numpy as np
 
+from pandas._config import get_option
+
 from pandas._typing import (
     ArrayLike,
     Axis,
@@ -844,8 +846,17 @@ def is_in_axis(key) -> bool:
     def is_in_obj(gpr) -> bool:
         if not hasattr(gpr, "name"):
             return False
+        if (
+            get_option("mode.copy_on_write")
+            and get_option("mode.data_manager") == "block"
+        ):
+            # For the CoW case, we need an equality check as the identity check
+            # no longer works (each Series from column access is a new object)
+            try:
+                return gpr.equals(obj[gpr.name])
+            except (AttributeError, KeyError, IndexError, InvalidIndexError):
+                return False
         try:
-            # TODO(CoW) this check no longer works
             return gpr is obj[gpr.name]
         except (KeyError, IndexError, InvalidIndexError):
             # IndexError reached in e.g. test_skip_group_keys when we pass
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 1c4c2f1177759..9fdc0f02e8652 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1212,30 +1212,19 @@ def test_groupby_wrong_multi_labels():
     tm.assert_frame_equal(result, expected)
 
 
-def test_groupby_series_with_name(df, using_copy_on_write):
+def test_groupby_series_with_name(df):
     msg = "The default value of numeric_only"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.groupby(df["A"]).mean()
         result2 = df.groupby(df["A"], as_index=False).mean()
     assert result.index.name == "A"
-    if using_copy_on_write:
-        # TODO(CoW) this shouldn't behave differently? (df["A"] is a new
-        # object each time, so can't use identity to check we are grouping
-        # by a column)
-        assert "A" not in result2
-    else:
-        assert "A" in result2
+    assert "A" in result2
 
     result = df.groupby([df["A"], df["B"]]).mean()
     result2 = df.groupby([df["A"], df["B"]], as_index=False).mean()
     assert result.index.names == ("A", "B")
-    if using_copy_on_write:
-        # TODO(CoW) see above
-        assert "A" not in result2
-        assert "B" not in result2
-    else:
-        assert "A" in result2
-        assert "B" in result2
+    assert "A" in result2
+    assert "B" in result2
 
 
 def test_seriesgroupby_name_attr(df):
@@ -1284,16 +1273,11 @@ def summarize_random_name(df):
     assert metrics.columns.name is None
 
 
-def test_groupby_nonstring_columns(using_copy_on_write):
+def test_groupby_nonstring_columns():
     df = DataFrame([np.arange(10) for x in range(10)])
     grouped = df.groupby(0)
     result = grouped.mean()
     expected = df.groupby(df[0]).mean()
-    if using_copy_on_write:
-        # TODO(CoW) see test_groupby_series_with_name above - we don't yet
-        # properly detect groupby by column Series
-        expected = expected.set_index(0)
-        expected.index = expected.index.astype("int64")
     tm.assert_frame_equal(result, expected)
 
 

From cea5fcb8d87aef3aafbc8a9ef36650486325cacc Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 4 Nov 2022 13:07:12 +0100
Subject: [PATCH 4/7] use xfail in test_to_dict_of_blocks_item_cache

---
 pandas/tests/frame/methods/test_to_dict_of_blocks.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py
index 5d206cfeab7bb..9705f24d0286c 100644
--- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py
+++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 import pandas.util._test_decorators as td
 
@@ -45,7 +46,9 @@ def test_no_copy_blocks(self, float_frame, using_copy_on_write):
             assert not _df[column].equals(df[column])
 
 
-def test_to_dict_of_blocks_item_cache(using_copy_on_write):
+def test_to_dict_of_blocks_item_cache(request, using_copy_on_write):
+    if using_copy_on_write:
+        request.node.add_marker(pytest.mark.xfail(reason="CoW - not yet implemented"))
     # Calling to_dict_of_blocks should not poison item_cache
     df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
     df["c"] = PandasArray(np.array([1, 2, None, 3], dtype=object))
@@ -57,9 +60,10 @@ def test_to_dict_of_blocks_item_cache(using_copy_on_write):
     df._to_dict_of_blocks()
 
     if using_copy_on_write:
-        # TODO(CoW) we should disallow this, so `df` doesn't get updated
+        # TODO(CoW) we should disallow this, so `df` doesn't get updated,
+        # this currently still updates df, so this test fails
         ser.values[0] = "foo"
-        assert df.loc[0, "b"] == "foo"
+        assert df.loc[0, "b"] == "a"
     else:
         # Check that the to_dict_of_blocks didn't break link between ser and df
         ser.values[0] = "foo"

From d4e9b7e2ff3ade1e43e5ff30bda7af69e7107f92 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 4 Nov 2022 16:17:09 +0100
Subject: [PATCH 5/7] fix sort_values inplace tests

---
 pandas/tests/frame/methods/test_sort_values.py  | 15 +++++++++++++--
 pandas/tests/series/methods/test_sort_values.py |  8 ++++++--
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index d7f1d900db052..852432f209329 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -330,10 +330,21 @@ def test_sort_values_datetimes(self):
         df2 = df.sort_values(by=["C", "B"])
         tm.assert_frame_equal(df1, df2)
 
-    def test_sort_values_frame_column_inplace_sort_exception(self, float_frame):
+    def test_sort_values_frame_column_inplace_sort_exception(
+        self, float_frame, using_copy_on_write
+    ):
         s = float_frame["A"]
-        with pytest.raises(ValueError, match="This Series is a view"):
+        float_frame_orig = float_frame.copy()
+        if using_copy_on_write:
+            # INFO(CoW) Series is a new object, so can be changed inplace
+            # without modifying original datafame
             s.sort_values(inplace=True)
+            tm.assert_series_equal(s, float_frame_orig["A"].sort_values())
+            # column in dataframe is not changed
+            tm.assert_frame_equal(float_frame, float_frame_orig)
+        else:
+            with pytest.raises(ValueError, match="This Series is a view"):
+                s.sort_values(inplace=True)
 
         cp = s.copy()
         cp.sort_values()  # it works!
diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py
index b5f589b3b2514..6ca08c32dcfe7 100644
--- a/pandas/tests/series/methods/test_sort_values.py
+++ b/pandas/tests/series/methods/test_sort_values.py
@@ -10,7 +10,7 @@
 
 
 class TestSeriesSortValues:
-    def test_sort_values(self, datetime_series):
+    def test_sort_values(self, datetime_series, using_copy_on_write):
 
         # check indexes are reordered corresponding with the values
         ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"])
@@ -85,8 +85,12 @@ def test_sort_values(self, datetime_series):
             "This Series is a view of some other array, to sort in-place "
             "you must create a copy"
         )
-        with pytest.raises(ValueError, match=msg):
+        if using_copy_on_write:
             s.sort_values(inplace=True)
+            tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
+        else:
+            with pytest.raises(ValueError, match=msg):
+                s.sort_values(inplace=True)
 
     def test_sort_values_categorical(self):
 

From 55b37856515c2bd152b0bd8250bc8fdce85fbd63 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 3 Jan 2023 16:47:32 +0100
Subject: [PATCH 6/7] expand test to cover other indexing methods

---
 pandas/tests/copy_view/test_indexing.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
index e4238ac1c2109..fd312c3375240 100644
--- a/pandas/tests/copy_view/test_indexing.py
+++ b/pandas/tests/copy_view/test_indexing.py
@@ -820,16 +820,28 @@ def test_column_as_series_set_with_upcast(using_copy_on_write, using_array_manag
         tm.assert_frame_equal(df, df_orig)
 
 
-def test_column_as_series_no_item_cache(using_copy_on_write, using_array_manager):
+@pytest.mark.parametrize(
+    "method",
+    [
+        lambda df: df["a"],
+        lambda df: df.loc[:, "a"],
+        lambda df: df.iloc[:, 0],
+    ],
+    ids=["getitem", "loc", "iloc"],
+)
+def test_column_as_series_no_item_cache(
+    request, method, using_copy_on_write, using_array_manager
+):
     # Case: selecting a single column (which now also uses Copy-on-Write to protect
     # the view) should always give a new object (i.e. not make use of a cache)
     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
     df_orig = df.copy()
 
-    s1 = df["a"]
-    s2 = df["a"]
+    s1 = method(df)
+    s2 = method(df)
 
-    if using_copy_on_write:
+    is_iloc = request.node.callspec.id == "iloc"
+    if using_copy_on_write or is_iloc:
         assert s1 is not s2
     else:
         assert s1 is s2
@@ -843,7 +855,7 @@ def test_column_as_series_no_item_cache(using_copy_on_write, using_array_manager
 
     if using_copy_on_write:
         tm.assert_series_equal(s2, df_orig["a"])
-        tm.assert_series_equal(df["a"], df_orig["a"])
+        tm.assert_frame_equal(df, df_orig)
     else:
         assert s2.iloc[0] == 0
 

From 3e7928200dc55fae6083b06f5403f59ca3a5f967 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 3 Jan 2023 16:48:28 +0100
Subject: [PATCH 7/7] add whatsnew

---
 doc/source/whatsnew/v2.0.0.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 22f6659367683..54f6f79834e1e 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -77,6 +77,14 @@ be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (
         df_pyarrow = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow")
     df_pyarrow.dtypes
 
+Copy on write improvements
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Accessing a single column of a DataFrame as a Series (e.g. ``df["col"]``) now always
+  returns a new object every time it is constructed (not returning multiple times an
+  identical, cached Series object). This ensures that those Series objects correctly
+  follow the Copy-on-Write rules (:issue:`49450`)
+
 .. _whatsnew_200.enhancements.other:
 
 Other enhancements