From b855f09e720b45fac3ac7edfd3cb78d173399f3d Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Fri, 6 Jan 2023 21:25:15 -0800
Subject: [PATCH 01/15] WIP: ENH: CoW support for replace

---
 pandas/core/frame.py                   |  2 +-
 pandas/core/generic.py                 |  4 +++-
 pandas/core/internals/managers.py      | 32 ++++++++++++++++++++++++--
 pandas/tests/copy_view/test_methods.py | 30 ++++++++++++++++++++++++
 4 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6491081c54592..de363d3616e73 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5522,7 +5522,7 @@ def _replace_columnwise(
         DataFrame or None
         """
         # Operate column-wise
-        res = self if inplace else self.copy()
+        res = self if inplace else self.copy(deep=None)
         ax = self.columns
 
         for i, ax_value in enumerate(ax):
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 561422c868e91..a64eb9e337f98 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7045,6 +7045,7 @@ def replace(
                 to_replace = [to_replace]
 
             if isinstance(to_replace, (tuple, list)):
+                # TODO: Consider copy-on-write for non-replaced columns's here
                 if isinstance(self, ABCDataFrame):
                     from pandas import Series
 
@@ -7104,7 +7105,8 @@ def replace(
             if not self.size:
                 if inplace:
                     return None
-                return self.copy()
+                # TODO: Is it even worth doing the CoW here?
+                return self.copy(deep=None)
 
             if is_dict_like(to_replace):
                 if is_dict_like(value):  # {'A' : NA} -> {'A' : 0}
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index c90787ee24277..b6cffac874aa4 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1211,8 +1211,12 @@ def value_getitem(placement):
                 return value[placement.indexer]
 
         # Accessing public blknos ensures the public versions are initialized
+        # print(f"Loc is {loc}")
         blknos = self.blknos[loc]
+        # print(f"Blknos is {blknos}")
         blklocs = self.blklocs[loc].copy()
+        # print(f"Blklocs is {blklocs}")
+        # print(f"Blocks is {self.blocks}")
 
         unfit_mgr_locs = []
         unfit_val_locs = []
@@ -1223,8 +1227,32 @@ def value_getitem(placement):
             if inplace and blk.should_store(value):
                 # Updating inplace -> check if we need to do Copy-on-Write
                 if using_copy_on_write() and not self._has_no_reference_block(blkno_l):
-                    blk.set_inplace(blk_locs, value_getitem(val_locs), copy=True)
-                    self._clear_reference_block(blkno_l)
+
+                    # print("Hit here")
+                    # print(f"Blk locs is {blk_locs}")
+                    # print(f"Blkno_l is {blkno_l}")
+                    prev_refs = self.refs[blkno_l]
+                    leftover_blocks = blk.delete(blk_locs)
+                    num_extra_blocks = len(leftover_blocks)
+                    # Preserve refs for unchanged blocks
+                    extra_refs = [prev_refs] * num_extra_blocks
+                    self.refs = self.refs[:blkno_l] + extra_refs + self.refs[blkno_l:]
+
+                    # TODO: Are we sure we want 2D?
+                    nb = new_block_2d(value, placement=blk._mgr_locs)
+                    old_blocks = self.blocks
+                    new_blocks = (
+                        old_blocks[:blkno_l]
+                        + tuple(leftover_blocks[:blkno_l])
+                        + (nb,)
+                        + tuple(leftover_blocks[blkno_l:])
+                        + old_blocks[blkno_l + num_extra_blocks :]
+                    )
+                    self.blocks = new_blocks
+
+                    self._rebuild_blknos_and_blklocs()
+                    # blk.set_inplace(blk_locs, value_getitem(val_locs), copy=True)
+                    # self._clear_reference_block(blkno_l)
                 else:
                     blk.set_inplace(blk_locs, value_getitem(val_locs))
             else:
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index af900fd5268ef..a95a8c41e1d9b 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -721,3 +721,33 @@ def test_squeeze(using_copy_on_write):
         # Without CoW the original will be modified
         assert np.shares_memory(series.values, get_array(df, "a"))
         assert df.loc[0, "a"] == 0
+
+
+@pytest.mark.parametrize(
+    "to_replace",
+    [
+        {"a": 1, "b": 4},
+        # Test CoW splits blocks to avoid copying unchanged columns
+        {"a": 1},
+        # TODO: broken, fix by fixing Block.replace()
+        # 1
+    ],
+)
+def test_replace(using_copy_on_write, to_replace):
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": ["foo", "bar", "baz"]})
+    df_orig = df.copy()
+
+    df_replaced = df.replace(to_replace=to_replace, value=-1)
+
+    # Should share memory regardless of CoW since squeeze is just an iloc
+    if using_copy_on_write:
+        if (df_replaced["b"] == df["b"]).all():
+            assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b"))
+        assert np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c"))
+
+    # mutating squeezed df triggers a copy-on-write for that column/block
+    df_replaced.loc[0, "c"] = -1
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "a"))
+
+    tm.assert_frame_equal(df, df_orig)

From e27d7886c2f9e852b6869e17d05df9d1c9c91cb7 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Sat, 7 Jan 2023 11:36:12 -0800
Subject: [PATCH 02/15] Fix bugs, should work for 1-D

---
 pandas/core/internals/managers.py      | 38 ++++++++++++++------------
 pandas/tests/copy_view/test_methods.py | 14 ++++++----
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index b6cffac874aa4..2b2416b65d16b 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1211,12 +1211,8 @@ def value_getitem(placement):
                 return value[placement.indexer]
 
         # Accessing public blknos ensures the public versions are initialized
-        # print(f"Loc is {loc}")
         blknos = self.blknos[loc]
-        # print(f"Blknos is {blknos}")
         blklocs = self.blklocs[loc].copy()
-        # print(f"Blklocs is {blklocs}")
-        # print(f"Blocks is {self.blocks}")
 
         unfit_mgr_locs = []
         unfit_val_locs = []
@@ -1227,30 +1223,38 @@ def value_getitem(placement):
             if inplace and blk.should_store(value):
                 # Updating inplace -> check if we need to do Copy-on-Write
                 if using_copy_on_write() and not self._has_no_reference_block(blkno_l):
-
-                    # print("Hit here")
-                    # print(f"Blk locs is {blk_locs}")
-                    # print(f"Blkno_l is {blkno_l}")
                     prev_refs = self.refs[blkno_l]
-                    leftover_blocks = blk.delete(blk_locs)
-                    num_extra_blocks = len(leftover_blocks)
+                    leftover_blocks = tuple(blk.delete(blk_locs))
                     # Preserve refs for unchanged blocks
-                    extra_refs = [prev_refs] * num_extra_blocks
-                    self.refs = self.refs[:blkno_l] + extra_refs + self.refs[blkno_l:]
+                    # Add new block where old block was and remaining blocks at
+                    # the end to avoid updating all block numbers
+                    extra_refs = [prev_refs] * len(leftover_blocks)
+                    self.refs = (
+                        self.refs[:blkno_l]
+                        + [None]
+                        + self.refs[blkno_l + 1 :]
+                        + extra_refs
+                    )
 
                     # TODO: Are we sure we want 2D?
-                    nb = new_block_2d(value, placement=blk._mgr_locs)
+                    # Also generalize this to bigger than 1-D locs
+                    nb = new_block_2d(
+                        value,
+                        placement=BlockPlacement(slice(blk_locs[0], blk_locs[0] + 1)),
+                    )
                     old_blocks = self.blocks
                     new_blocks = (
                         old_blocks[:blkno_l]
-                        + tuple(leftover_blocks[:blkno_l])
                         + (nb,)
-                        + tuple(leftover_blocks[blkno_l:])
-                        + old_blocks[blkno_l + num_extra_blocks :]
+                        + old_blocks[blkno_l + 1 :]
+                        + leftover_blocks
                     )
+                    self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
+                    for i, nb in enumerate(leftover_blocks):
+                        self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
+                        self._blknos[nb.mgr_locs.indexer] = i + len(old_blocks)
                     self.blocks = new_blocks
 
-                    self._rebuild_blknos_and_blklocs()
                     # blk.set_inplace(blk_locs, value_getitem(val_locs), copy=True)
                     # self._clear_reference_block(blkno_l)
                 else:
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index a95a8c41e1d9b..da7fb688f1ca0 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -724,22 +724,24 @@ def test_squeeze(using_copy_on_write):
 
 
 @pytest.mark.parametrize(
-    "to_replace",
+    "replace_kwargs",
     [
-        {"a": 1, "b": 4},
+        {"to_replace": {"a": 1, "b": 4}, "value": -1},
         # Test CoW splits blocks to avoid copying unchanged columns
-        {"a": 1},
+        {"to_replace": {"a": 1}, "value": -1},
+        {"to_replace": {"b": 4}, "value": -1},
+        # TODO: broken, fix by fixing Block.replace_list()
+        # {"to_replace": {"b": 1}},
         # TODO: broken, fix by fixing Block.replace()
         # 1
     ],
 )
-def test_replace(using_copy_on_write, to_replace):
+def test_replace(using_copy_on_write, replace_kwargs):
     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": ["foo", "bar", "baz"]})
     df_orig = df.copy()
 
-    df_replaced = df.replace(to_replace=to_replace, value=-1)
+    df_replaced = df.replace(**replace_kwargs)
 
-    # Should share memory regardless of CoW since squeeze is just an iloc
     if using_copy_on_write:
         if (df_replaced["b"] == df["b"]).all():
             assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b"))

From f638abdde3654463903f01d39f268f59c5a75eff Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Mon, 9 Jan 2023 12:23:25 -0800
Subject: [PATCH 03/15] Refactor to support more than one loc

---
 pandas/core/internals/managers.py | 55 +++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 6 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 2b2416b65d16b..b270767a52df4 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1235,19 +1235,62 @@ def value_getitem(placement):
                         + self.refs[blkno_l + 1 :]
                         + extra_refs
                     )
+                    if len(blk_locs) > 1:
+                        # Add the refs for the other deleted blocks
+                        self.refs += [None] * (len(blk_locs) - 1)
+
+                    # Fill before the first leftover block
+                    nbs = []
+                    leftover_start_i = leftover_blocks[0].mgr_locs.as_slice.start
+
+                    if leftover_start_i != 0:
+                        nbs.append(
+                            new_block_2d(
+                                np.tile(value, (leftover_start_i, 1)),
+                                placement=BlockPlacement(slice(0, leftover_start_i)),
+                            )
+                        )
+
+                    # Every hole in between leftover blocks is where we need to insert
+                    # a new block
+                    for i in range(len(leftover_blocks) - 1):
+                        curr_block = leftover_blocks[i]
+                        next_block = leftover_blocks[i + 1]
+
+                        curr_end = curr_block.as_slice.end
+                        next_start = next_block.as_slice.start
+
+                        num_to_fill = next_start - (curr_end - 1)
+                        nb = new_block_2d(
+                            np.tile(value, (num_to_fill, 1)),
+                            placement=BlockPlacement(
+                                slice(curr_end, curr_end + num_to_fill)
+                            ),
+                        )
+                        nbs.append(nb)
+
+                    # Fill after the last leftover block
+                    last_del_loc = blk_locs[-1] + 1
+                    last_leftover_loc = leftover_blocks[-1].mgr_locs.as_slice.stop
+                    if last_del_loc > last_leftover_loc:
+                        diff = last_del_loc - last_leftover_loc
+                        nbs.append(
+                            new_block_2d(
+                                np.tile(value, (diff, 1)),
+                                placement=BlockPlacement(
+                                    slice(last_leftover_loc, last_del_loc)
+                                ),
+                            )
+                        )
 
-                    # TODO: Are we sure we want 2D?
-                    # Also generalize this to bigger than 1-D locs
-                    nb = new_block_2d(
-                        value,
-                        placement=BlockPlacement(slice(blk_locs[0], blk_locs[0] + 1)),
-                    )
                     old_blocks = self.blocks
+                    nb = nbs[0]
                     new_blocks = (
                         old_blocks[:blkno_l]
                         + (nb,)
                         + old_blocks[blkno_l + 1 :]
                         + leftover_blocks
+                        + tuple(nbs)
                     )
                     self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
                     for i, nb in enumerate(leftover_blocks):

From bc3af5c28df1c4de73cfe9e5897e825addfab49e Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Tue, 10 Jan 2023 11:09:40 -0800
Subject: [PATCH 04/15] Don't access refs if None

---
 pandas/core/internals/managers.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index b270767a52df4..7dff217492144 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1223,21 +1223,22 @@ def value_getitem(placement):
             if inplace and blk.should_store(value):
                 # Updating inplace -> check if we need to do Copy-on-Write
                 if using_copy_on_write() and not self._has_no_reference_block(blkno_l):
-                    prev_refs = self.refs[blkno_l]
                     leftover_blocks = tuple(blk.delete(blk_locs))
-                    # Preserve refs for unchanged blocks
-                    # Add new block where old block was and remaining blocks at
-                    # the end to avoid updating all block numbers
-                    extra_refs = [prev_refs] * len(leftover_blocks)
-                    self.refs = (
-                        self.refs[:blkno_l]
-                        + [None]
-                        + self.refs[blkno_l + 1 :]
-                        + extra_refs
-                    )
-                    if len(blk_locs) > 1:
-                        # Add the refs for the other deleted blocks
-                        self.refs += [None] * (len(blk_locs) - 1)
+                    if self.refs is not None:
+                        prev_refs = self.refs[blkno_l]
+                        # Preserve refs for unchanged blocks
+                        # Add new block where old block was and remaining blocks at
+                        # the end to avoid updating all block numbers
+                        extra_refs = [prev_refs] * len(leftover_blocks)
+                        self.refs = (
+                            self.refs[:blkno_l]
+                            + [None]
+                            + self.refs[blkno_l + 1 :]
+                            + extra_refs
+                        )
+                        if len(blk_locs) > 1:
+                            # Add the refs for the other deleted blocks
+                            self.refs += [None] * (len(blk_locs) - 1)
 
                     # Fill before the first leftover block
                     nbs = []

From 66b957f4a735df2392daddc7a01fd3e231ed5f1d Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Thu, 12 Jan 2023 14:01:59 -0800
Subject: [PATCH 05/15] Flesh out tests, fix issues

---
 pandas/core/generic.py                   |  1 -
 pandas/core/internals/managers.py        | 35 +++++++-----------
 pandas/tests/copy_view/test_internals.py | 46 ++++++++++++++++++++++++
 pandas/tests/copy_view/test_methods.py   |  6 ++--
 4 files changed, 63 insertions(+), 25 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a64eb9e337f98..f11bcdcdec18f 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7105,7 +7105,6 @@ def replace(
             if not self.size:
                 if inplace:
                     return None
-                # TODO: Is it even worth doing the CoW here?
                 return self.copy(deep=None)
 
             if is_dict_like(to_replace):
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 7dff217492144..ac3177f4be45a 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1224,21 +1224,6 @@ def value_getitem(placement):
                 # Updating inplace -> check if we need to do Copy-on-Write
                 if using_copy_on_write() and not self._has_no_reference_block(blkno_l):
                     leftover_blocks = tuple(blk.delete(blk_locs))
-                    if self.refs is not None:
-                        prev_refs = self.refs[blkno_l]
-                        # Preserve refs for unchanged blocks
-                        # Add new block where old block was and remaining blocks at
-                        # the end to avoid updating all block numbers
-                        extra_refs = [prev_refs] * len(leftover_blocks)
-                        self.refs = (
-                            self.refs[:blkno_l]
-                            + [None]
-                            + self.refs[blkno_l + 1 :]
-                            + extra_refs
-                        )
-                        if len(blk_locs) > 1:
-                            # Add the refs for the other deleted blocks
-                            self.refs += [None] * (len(blk_locs) - 1)
 
                     # Fill before the first leftover block
                     nbs = []
@@ -1258,8 +1243,8 @@ def value_getitem(placement):
                         curr_block = leftover_blocks[i]
                         next_block = leftover_blocks[i + 1]
 
-                        curr_end = curr_block.as_slice.end
-                        next_start = next_block.as_slice.start
+                        curr_end = curr_block.mgr_locs.as_slice.stop
+                        next_start = next_block.mgr_locs.as_slice.start
 
                         num_to_fill = next_start - (curr_end - 1)
                         nb = new_block_2d(
@@ -1283,7 +1268,8 @@ def value_getitem(placement):
                                 ),
                             )
                         )
-
+                    # Add new block where old block was and remaining blocks at
+                    # the end to avoid updating all block numbers
                     old_blocks = self.blocks
                     nb = nbs[0]
                     new_blocks = (
@@ -1291,16 +1277,21 @@ def value_getitem(placement):
                         + (nb,)
                         + old_blocks[blkno_l + 1 :]
                         + leftover_blocks
-                        + tuple(nbs)
+                        + tuple(nbs[1:])
                     )
+                    if self.refs is not None:
+                        prev_refs = self.refs[blkno_l]
+                        extra_refs = [prev_refs] * len(leftover_blocks)
+                        self.refs[blkno_l] = None
+                        self.refs += extra_refs
+                        if len(blk_locs) > 1:
+                            # Add the refs for the other deleted blocks
+                            self.refs += [None] * (len(blk_locs) - 1)
                     self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
                     for i, nb in enumerate(leftover_blocks):
                         self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
                         self._blknos[nb.mgr_locs.indexer] = i + len(old_blocks)
                     self.blocks = new_blocks
-
-                    # blk.set_inplace(blk_locs, value_getitem(val_locs), copy=True)
-                    # self._clear_reference_block(blkno_l)
                 else:
                     blk.set_inplace(blk_locs, value_getitem(val_locs))
             else:
diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py
index 7a2965f2e1c61..648e027ec2228 100644
--- a/pandas/tests/copy_view/test_internals.py
+++ b/pandas/tests/copy_view/test_internals.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 from pandas import DataFrame
+import pandas._testing as tm
 from pandas.tests.copy_view.util import get_array
 
 
@@ -92,3 +93,48 @@ def test_switch_options():
         subset.iloc[0, 0] = 0
         # df updated with CoW disabled
         assert df.iloc[0, 0] == 0
+
+
+@td.skip_array_manager_invalid_test
+@pytest.mark.parametrize(
+    "locs",
+    [
+        [0],
+        [1],
+        [3],
+        [0, 1],
+        [0, 2],
+        [0, 1, 2],
+        [1, 2],
+        [1, 3],
+        [0, 5],
+    ],
+)
+def test_iset_splits_blocks_inplace(using_copy_on_write, locs):
+    # Nothing currently calls iset with
+    # more than 1 loc with inplace=True (only happens with inplace=False)
+    # but ensure that it works
+    df = DataFrame(
+        {
+            "a": [1, 2, 3],
+            "b": [4, 5, 6],
+            "c": [7, 8, 9],
+            "d": [10, 11, 12],
+            "e": [13, 14, 15],
+            "f": ["foo", "bar", "baz"],
+        }
+    )
+    df_orig = df.copy()
+    df2 = df.copy(deep=None)  # Trigger a CoW (if enabled, otherwise makes copy)
+    arr = np.array([-1, -2, -3])
+    df2._mgr.iset(locs, arr, inplace=True)
+
+    tm.assert_frame_equal(df, df_orig)
+
+    if using_copy_on_write:
+        for i, col in enumerate(df.columns):
+            if i not in locs:
+                assert np.shares_memory(get_array(df, col), get_array(df2, col))
+    else:
+        for col in df.columns:
+            assert not np.shares_memory(get_array(df, col), get_array(df2, col))
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index da7fb688f1ca0..e919c43a85bfe 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -730,9 +730,11 @@ def test_squeeze(using_copy_on_write):
         # Test CoW splits blocks to avoid copying unchanged columns
         {"to_replace": {"a": 1}, "value": -1},
         {"to_replace": {"b": 4}, "value": -1},
-        # TODO: broken, fix by fixing Block.replace_list()
+        {"to_replace": {"b": {4: 1}}},
+        # TODO: Add these in a further optimization
+        # We would need to see which columns got replaced in the mask
+        # which could be expensive
         # {"to_replace": {"b": 1}},
-        # TODO: broken, fix by fixing Block.replace()
         # 1
     ],
 )

From 2266ab8786f8c8ce7559e2675ad0152458430d90 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Thu, 12 Jan 2023 14:50:52 -0800
Subject: [PATCH 06/15] fix tests

---
 pandas/tests/copy_view/test_internals.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py
index 648e027ec2228..fa8e73ce7ebb3 100644
--- a/pandas/tests/copy_view/test_internals.py
+++ b/pandas/tests/copy_view/test_internals.py
@@ -97,20 +97,19 @@ def test_switch_options():
 
 @td.skip_array_manager_invalid_test
 @pytest.mark.parametrize(
-    "locs",
+    "locs, arr",
     [
-        [0],
-        [1],
-        [3],
-        [0, 1],
-        [0, 2],
-        [0, 1, 2],
-        [1, 2],
-        [1, 3],
-        [0, 5],
+        ([0], np.array([-1, -2, -3])),
+        ([1], np.array([-1, -2, -3])),
+        ([5], np.array([-1, -2, -3])),
+        ([0, 1], np.array([-1, -2, -3])),
+        ([0, 2], np.array([-1, -2, -3])),
+        ([0, 1, 2], np.array([-1, -2, -3])),
+        ([1, 2], np.array([-1, -2, -3])),
+        ([1, 3], np.array([-1, -2, -3])),
     ],
 )
-def test_iset_splits_blocks_inplace(using_copy_on_write, locs):
+def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr):
     # Nothing currently calls iset with
     # more than 1 loc with inplace=True (only happens with inplace=False)
     # but ensure that it works
@@ -126,7 +125,6 @@ def test_iset_splits_blocks_inplace(using_copy_on_write, locs):
     )
     df_orig = df.copy()
     df2 = df.copy(deep=None)  # Trigger a CoW (if enabled, otherwise makes copy)
-    arr = np.array([-1, -2, -3])
     df2._mgr.iset(locs, arr, inplace=True)
 
     tm.assert_frame_equal(df, df_orig)

From 7aee8476bb334632d4d3e9d79185e87037a6fed3 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Fri, 13 Jan 2023 19:51:22 -0800
Subject: [PATCH 07/15] fixes

---
 pandas/core/internals/managers.py        | 16 +++++++++++-----
 pandas/tests/copy_view/test_internals.py |  1 +
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index f2858cde5c403..aff0d7db134c5 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1224,6 +1224,9 @@ def value_getitem(placement):
                 # Updating inplace -> check if we need to do Copy-on-Write
                 if using_copy_on_write() and not self._has_no_reference_block(blkno_l):
                     leftover_blocks = tuple(blk.delete(blk_locs))
+                    vals = np.broadcast_to(
+                        value_getitem(val_locs), blk.values[blk_locs].shape
+                    )
 
                     # Fill before the first leftover block
                     nbs = []
@@ -1232,13 +1235,14 @@ def value_getitem(placement):
                     if leftover_start_i != 0:
                         nbs.append(
                             new_block_2d(
-                                np.tile(value, (leftover_start_i, 1)),
+                                vals[:leftover_start_i],
                                 placement=BlockPlacement(slice(0, leftover_start_i)),
                             )
                         )
 
                     # Every hole in between leftover blocks is where we need to insert
                     # a new block
+                    curr_idx = leftover_start_i
                     for i in range(len(leftover_blocks) - 1):
                         curr_block = leftover_blocks[i]
                         next_block = leftover_blocks[i + 1]
@@ -1246,14 +1250,15 @@ def value_getitem(placement):
                         curr_end = curr_block.mgr_locs.as_slice.stop
                         next_start = next_block.mgr_locs.as_slice.start
 
-                        num_to_fill = next_start - (curr_end - 1)
+                        num_to_fill = next_start - curr_end
                         nb = new_block_2d(
-                            np.tile(value, (num_to_fill, 1)),
+                            vals[curr_idx : curr_idx + num_to_fill],
                             placement=BlockPlacement(
                                 slice(curr_end, curr_end + num_to_fill)
                             ),
                         )
                         nbs.append(nb)
+                        curr_idx += num_to_fill
 
                     # Fill after the last leftover block
                     last_del_loc = blk_locs[-1] + 1
@@ -1262,7 +1267,7 @@ def value_getitem(placement):
                         diff = last_del_loc - last_leftover_loc
                         nbs.append(
                             new_block_2d(
-                                np.tile(value, (diff, 1)),
+                                vals[curr_idx : curr_idx + diff],
                                 placement=BlockPlacement(
                                     slice(last_leftover_loc, last_del_loc)
                                 ),
@@ -1271,6 +1276,7 @@ def value_getitem(placement):
                     # Add new block where old block was and remaining blocks at
                     # the end to avoid updating all block numbers
                     old_blocks = self.blocks
+                    nbs = tuple(nbs)
                     nb = nbs[0]
                     new_blocks = (
                         old_blocks[:blkno_l]
@@ -1288,7 +1294,7 @@ def value_getitem(placement):
                             # Add the refs for the other deleted blocks
                             self.refs += [None] * (len(blk_locs) - 1)
                     self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
-                    for i, nb in enumerate(leftover_blocks):
+                    for i, nb in enumerate(leftover_blocks + nbs[1:]):
                         self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
                         self._blknos[nb.mgr_locs.indexer] = i + len(old_blocks)
                     self.blocks = new_blocks
diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py
index fa8e73ce7ebb3..d9df1f0040769 100644
--- a/pandas/tests/copy_view/test_internals.py
+++ b/pandas/tests/copy_view/test_internals.py
@@ -107,6 +107,7 @@ def test_switch_options():
         ([0, 1, 2], np.array([-1, -2, -3])),
         ([1, 2], np.array([-1, -2, -3])),
         ([1, 3], np.array([-1, -2, -3])),
+        ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
     ],
 )
 def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr):

From 6dc05e185e8d5a350d465d39c9bf397131983840 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 14 Jan 2023 20:10:17 +0100
Subject: [PATCH 08/15] Simplify block splitting logic

---
 pandas/core/internals/managers.py | 103 ++++++------------------------
 1 file changed, 18 insertions(+), 85 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 0d3ccf94b822b..0ed59bbde408d 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1220,83 +1220,15 @@ def value_getitem(placement):
             if inplace and blk.should_store(value):
                 # Updating inplace -> check if we need to do Copy-on-Write
                 if using_copy_on_write() and not self._has_no_reference_block(blkno_l):
-                    leftover_blocks = tuple(blk.delete(blk_locs))
-                    vals = np.broadcast_to(
-                        value_getitem(val_locs), blk.values[blk_locs].shape
-                    )
-
-                    # Fill before the first leftover block
-                    nbs = []
-                    leftover_start_i = leftover_blocks[0].mgr_locs.as_slice.start
-
-                    if leftover_start_i != 0:
-                        nbs.append(
-                            new_block_2d(
-                                vals[:leftover_start_i],
-                                placement=BlockPlacement(slice(0, leftover_start_i)),
-                            )
-                        )
-
-                    # Every hole in between leftover blocks is where we need to insert
-                    # a new block
-                    curr_idx = leftover_start_i
-                    for i in range(len(leftover_blocks) - 1):
-                        curr_block = leftover_blocks[i]
-                        next_block = leftover_blocks[i + 1]
-
-                        curr_end = curr_block.mgr_locs.as_slice.stop
-                        next_start = next_block.mgr_locs.as_slice.start
-
-                        num_to_fill = next_start - curr_end
-                        nb = new_block_2d(
-                            vals[curr_idx : curr_idx + num_to_fill],
-                            placement=BlockPlacement(
-                                slice(curr_end, curr_end + num_to_fill)
-                            ),
-                        )
-                        nbs.append(nb)
-                        curr_idx += num_to_fill
-
-                    # Fill after the last leftover block
-                    last_del_loc = blk_locs[-1] + 1
-                    last_leftover_loc = leftover_blocks[-1].mgr_locs.as_slice.stop
-                    if last_del_loc > last_leftover_loc:
-                        diff = last_del_loc - last_leftover_loc
-                        nbs.append(
-                            new_block_2d(
-                                vals[curr_idx : curr_idx + diff],
-                                placement=BlockPlacement(
-                                    slice(last_leftover_loc, last_del_loc)
-                                ),
-                            )
-                        )
-                    # Add new block where old block was and remaining blocks at
-                    # the end to avoid updating all block numbers
-                    old_blocks = self.blocks
-                    nbs = tuple(nbs)
-                    nb = nbs[0]
-                    new_blocks = (
-                        old_blocks[:blkno_l]
-                        + (nb,)
-                        + old_blocks[blkno_l + 1 :]
-                        + leftover_blocks
-                        + tuple(nbs[1:])
+                    nbs_tup = tuple(blk.delete(blk_locs))
+                    first_nb = new_block_2d(
+                        value_getitem(val_locs), BlockPlacement(blk.mgr_locs[blk_locs])
                     )
                     if self.refs is not None:
-                        prev_refs = self.refs[blkno_l]
-                        extra_refs = [prev_refs] * len(leftover_blocks)
-                        self.refs[blkno_l] = None
-                        self.refs += extra_refs
-                        if len(blk_locs) > 1:
-                            # Add the refs for the other deleted blocks
-                            self.refs += [None] * (len(blk_locs) - 1)
-                    self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
-                    for i, nb in enumerate(leftover_blocks + nbs[1:]):
-                        self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
-                        self._blknos[nb.mgr_locs.indexer] = i + len(old_blocks)
-                    self.blocks = new_blocks
+                        self.refs.extend([self.refs[blkno_l]] * len(nbs_tup))
                 else:
                     blk.set_inplace(blk_locs, value_getitem(val_locs))
+                    continue
             else:
                 unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs])
                 unfit_val_locs.append(val_locs)
@@ -1304,25 +1236,26 @@ def value_getitem(placement):
                 # If all block items are unfit, schedule the block for removal.
                 if len(val_locs) == len(blk.mgr_locs):
                     removed_blknos.append(blkno_l)
+                    continue
                 else:
                     nbs = blk.delete(blk_locs)
                     # Add first block where old block was and remaining blocks at
                     # the end to avoid updating all block numbers
                     first_nb = nbs[0]
                     nbs_tup = tuple(nbs[1:])
-                    nr_blocks = len(self.blocks)
-                    blocks_tup = (
-                        self.blocks[:blkno_l]
-                        + (first_nb,)
-                        + self.blocks[blkno_l + 1 :]
-                        + nbs_tup
-                    )
-                    self.blocks = blocks_tup
-                    self._blklocs[first_nb.mgr_locs.indexer] = np.arange(len(first_nb))
+            nr_blocks = len(self.blocks)
+            blocks_tup = (
+                self.blocks[:blkno_l]
+                + (first_nb,)
+                + self.blocks[blkno_l + 1 :]
+                + nbs_tup
+            )
+            self.blocks = blocks_tup
+            self._blklocs[first_nb.mgr_locs.indexer] = np.arange(len(first_nb))
 
-                    for i, nb in enumerate(nbs_tup):
-                        self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
-                        self._blknos[nb.mgr_locs.indexer] = i + nr_blocks
+            for i, nb in enumerate(nbs_tup):
+                self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
+                self._blknos[nb.mgr_locs.indexer] = i + nr_blocks
 
         if len(removed_blknos):
             # Remove blocks & update blknos and refs accordingly

From 6514131fe77bfb1664eec1a6742cdbd3145c8802 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 14 Jan 2023 20:37:59 +0100
Subject: [PATCH 09/15] Clear ref

---
 pandas/core/internals/managers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 0ed59bbde408d..07c8194227283 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1226,6 +1226,7 @@ def value_getitem(placement):
                     )
                     if self.refs is not None:
                         self.refs.extend([self.refs[blkno_l]] * len(nbs_tup))
+                    self._clear_reference_block(blkno)
                 else:
                     blk.set_inplace(blk_locs, value_getitem(val_locs))
                     continue

From 384984223fa6f7861ff5618dbc427ae3f36f7eda Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 14 Jan 2023 20:42:02 +0100
Subject: [PATCH 10/15] Adjust test

---
 pandas/tests/copy_view/test_methods.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index b1b286b813d04..5a7efbb4bae8d 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -934,6 +934,10 @@ def test_replace(using_copy_on_write, replace_kwargs):
     if using_copy_on_write:
         assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "a"))
 
+    if "a" in replace_kwargs:
+        arr = get_array(df_replaced, "a")
+        df_replaced.loc[0, "a"] = 100
+        assert np.shares_memory(get_array(df_replaced, "a"), arr)
     tm.assert_frame_equal(df, df_orig)
 
 

From f2b8a97c79ee1cbb06bcff9af09dfa85ebbb4fbc Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 14 Jan 2023 20:45:12 +0100
Subject: [PATCH 11/15] Fix test

---
 pandas/tests/copy_view/test_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index 5a7efbb4bae8d..f5987cebea9c2 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -934,7 +934,7 @@ def test_replace(using_copy_on_write, replace_kwargs):
     if using_copy_on_write:
         assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "a"))
 
-    if "a" in replace_kwargs:
+    if "a" in replace_kwargs["to_replace"]:
         arr = get_array(df_replaced, "a")
         df_replaced.loc[0, "a"] = 100
         assert np.shares_memory(get_array(df_replaced, "a"), arr)

From c1d90ca3d0c03ddff143de4312b37d0171ee3b52 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 14 Jan 2023 21:30:57 +0100
Subject: [PATCH 12/15] Small fix

---
 pandas/core/internals/managers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 07c8194227283..1a0aba0778da5 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1226,7 +1226,7 @@ def value_getitem(placement):
                     )
                     if self.refs is not None:
                         self.refs.extend([self.refs[blkno_l]] * len(nbs_tup))
-                    self._clear_reference_block(blkno)
+                    self._clear_reference_block(blkno_l)
                 else:
                     blk.set_inplace(blk_locs, value_getitem(val_locs))
                     continue

From e8e1ff243e2c27db158fc9194a98f88d8850b08e Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 14 Jan 2023 21:50:19 +0100
Subject: [PATCH 13/15] Fix 32 bit test

---
 pandas/tests/copy_view/test_internals.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py
index bd2dca5c80cfa..f16a262b2ea16 100644
--- a/pandas/tests/copy_view/test_internals.py
+++ b/pandas/tests/copy_view/test_internals.py
@@ -100,15 +100,15 @@ def test_switch_options():
 @pytest.mark.parametrize(
     "locs, arr",
     [
-        ([0], np.array([-1, -2, -3])),
-        ([1], np.array([-1, -2, -3])),
-        ([5], np.array([-1, -2, -3])),
-        ([0, 1], np.array([-1, -2, -3])),
-        ([0, 2], np.array([-1, -2, -3])),
-        ([0, 1, 2], np.array([-1, -2, -3])),
-        ([1, 2], np.array([-1, -2, -3])),
-        ([1, 3], np.array([-1, -2, -3])),
-        ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
+        ([0], np.array([-1, -2, -3], dtype=np.intp)),
+        ([1], np.array([-1, -2, -3], dtype=np.intp)),
+        ([5], np.array([-1, -2, -3], dtype=np.intp)),
+        ([0, 1], np.array([-1, -2, -3], dtype=np.intp)),
+        ([0, 2], np.array([-1, -2, -3], dtype=np.intp)),
+        ([0, 1, 2], np.array([-1, -2, -3], dtype=np.intp)),
+        ([1, 2], np.array([-1, -2, -3], dtype=np.intp)),
+        ([1, 3], np.array([-1, -2, -3], dtype=np.intp)),
+        ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]], dtype=np.intp).T),
     ],
 )
 def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr):
@@ -122,9 +122,10 @@ def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr):
             "c": [7, 8, 9],
             "d": [10, 11, 12],
             "e": [13, 14, 15],
-            "f": ["foo", "bar", "baz"],
-        }
+        },
+        dtype=np.intp,
     )
+    df["f"] = ["a", "b", "c"]
     df_orig = df.copy()
     df2 = df.copy(deep=None)  # Trigger a CoW (if enabled, otherwise makes copy)
     df2._mgr.iset(locs, arr, inplace=True)

From 9e84de41e44860325ab55630ebaccc4c5ba9d7ec Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 14 Jan 2023 22:49:33 +0100
Subject: [PATCH 14/15] Adjust test

---
 pandas/tests/copy_view/test_internals.py | 25 ++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py
index f16a262b2ea16..506ae7d3465c5 100644
--- a/pandas/tests/copy_view/test_internals.py
+++ b/pandas/tests/copy_view/test_internals.py
@@ -97,21 +97,22 @@ def test_switch_options():
 
 
 @td.skip_array_manager_invalid_test
+@pytest.mark.parametrize("dtype", [np.intp, np.int8])
 @pytest.mark.parametrize(
     "locs, arr",
     [
-        ([0], np.array([-1, -2, -3], dtype=np.intp)),
-        ([1], np.array([-1, -2, -3], dtype=np.intp)),
-        ([5], np.array([-1, -2, -3], dtype=np.intp)),
-        ([0, 1], np.array([-1, -2, -3], dtype=np.intp)),
-        ([0, 2], np.array([-1, -2, -3], dtype=np.intp)),
-        ([0, 1, 2], np.array([-1, -2, -3], dtype=np.intp)),
-        ([1, 2], np.array([-1, -2, -3], dtype=np.intp)),
-        ([1, 3], np.array([-1, -2, -3], dtype=np.intp)),
-        ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]], dtype=np.intp).T),
+        ([0], np.array([-1, -2, -3])),
+        ([1], np.array([-1, -2, -3])),
+        ([5], np.array([-1, -2, -3])),
+        ([0, 1], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
+        ([0, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
+        ([0, 1, 2], np.array([[-1, -2, -3], [-4, -5, -6], [-4, -5, -6]]).T),
+        ([1, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
+        ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
+        ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
     ],
 )
-def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr):
+def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype):
     # Nothing currently calls iset with
     # more than 1 loc with inplace=True (only happens with inplace=False)
     # but ensure that it works
@@ -122,10 +123,10 @@ def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr):
             "c": [7, 8, 9],
             "d": [10, 11, 12],
             "e": [13, 14, 15],
+            "f": ["a", "b", "c"],
         },
-        dtype=np.intp,
     )
-    df["f"] = ["a", "b", "c"]
+    arr = arr.astype(dtype)
     df_orig = df.copy()
     df2 = df.copy(deep=None)  # Trigger a CoW (if enabled, otherwise makes copy)
     df2._mgr.iset(locs, arr, inplace=True)

From 7988a163b7898f6965ea344b2eb8e32f3b08c08f Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Mon, 16 Jan 2023 17:19:06 +0100
Subject: [PATCH 15/15] Update pandas/tests/copy_view/test_methods.py

Co-authored-by: Thomas Li <47963215+lithomas1@users.noreply.github.com>
---
 pandas/tests/copy_view/test_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index f5987cebea9c2..d1184c3a70610 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -932,7 +932,7 @@ def test_replace(using_copy_on_write, replace_kwargs):
     # mutating squeezed df triggers a copy-on-write for that column/block
     df_replaced.loc[0, "c"] = -1
     if using_copy_on_write:
-        assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "a"))
+        assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c"))
 
     if "a" in replace_kwargs["to_replace"]:
         arr = get_array(df_replaced, "a")