BUG: df.iloc[:, 0] = df.iloc[::-1, 0] not setting inplace for EAs (pandas-dev#45352)

jbrockmendel · yehoshuadimarsky · commit bdef5adb4623 · 2022-07-13T10:17:56.000-04:00
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -360,6 +360,7 @@ Indexing
 - Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
 - Bug in :meth:`Series.align` does not create :class:`MultiIndex` with union of levels when both MultiIndexes intersections are identical (:issue:`45224`)
 - Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`)
+- Bug in indexing setting values into an ``ExtensionDtype`` column with ``df.iloc[:, i] = values`` with ``values`` having the same dtype as ``df.iloc[:, i]`` incorrectly inserting a new array instead of setting in-place (:issue:`33457`)
 - Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised instead of casting to a common dtype (:issue:`45070`)
 - Bug in :meth:`Series.__setitem__` when setting incompatible values into a ``PeriodDtype`` or ``IntervalDtype`` :class:`Series` raising when indexing with a boolean mask but coercing when indexing with otherwise-equivalent indexers; these now consistently coerce, along with :meth:`Series.mask` and :meth:`Series.where` (:issue:`45768`)
 - Bug in :meth:`DataFrame.where` with multiple columns with datetime-like dtypes failing to downcast results consistent with other dtypes (:issue:`45837`)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1636,21 +1636,9 @@ def iget(self, i: int | tuple[int, int] | tuple[slice, int]):
             return self.values
 
     def set_inplace(self, locs, values: ArrayLike) -> None:
-        # NB: This is a misnomer, is supposed to be inplace but is not,
-        #  see GH#33457
         # When an ndarray, we should have locs.tolist() == [0]
         # When a BlockPlacement we should have list(locs) == [0]
-
-        # error: Incompatible types in assignment (expression has type
-        # "Union[ExtensionArray, ndarray[Any, Any]]", variable has type
-        # "ExtensionArray")
-        self.values = values  # type: ignore[assignment]
-        try:
-            # TODO(GH33457) this can be removed
-            self._cache.clear()
-        except AttributeError:
-            # _cache not yet initialized
-            pass
+        self.values[:] = values
 
     def _maybe_squeeze_arg(self, arg):
         """
diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
@@ -360,6 +360,14 @@ def test_setitem_series(self, data, full_indexer):
     def test_setitem_frame_2d_values(self, data):
         # GH#44514
         df = pd.DataFrame({"A": data})
+
+        # Avoiding using_array_manager fixture
+        #  https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
+        using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
+
+        df = pd.DataFrame({"A": data})
+        blk_data = df._mgr.arrays[0]
+
         orig = df.copy()
 
         df.iloc[:] = df
@@ -370,6 +378,10 @@ def test_setitem_frame_2d_values(self, data):
 
         df.iloc[:] = df.values
         self.assert_frame_equal(df, orig)
+        if not using_array_manager:
+            # GH#33457 Check that this setting occurred in-place
+            # FIXME(ArrayManager): this should work there too
+            assert df._mgr.arrays[0] is blk_data
 
         df.iloc[:-1] = df.values[:-1]
         self.assert_frame_equal(df, orig)
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -2519,6 +2519,7 @@ def test_dict_nocopy(
             return
 
         c = pd.array([1, 2], dtype=any_numeric_ea_dtype)
+        c_orig = c.copy()
         df = DataFrame({"a": a, "b": b, "c": c}, copy=copy)
 
         def get_base(obj):
@@ -2530,9 +2531,19 @@ def get_base(obj):
             else:
                 raise TypeError
 
-        def check_views():
+        def check_views(c_only: bool = False):
             # written to work for either BlockManager or ArrayManager
+
+            # Check that the underlying data behind df["c"] is still `c`
+            #  after setting with iloc.  Since we don't know which entry in
+            #  df._mgr.arrays corresponds to df["c"], we just check that exactly
+            #  one of these arrays is `c`.  GH#38939
             assert sum(x is c for x in df._mgr.arrays) == 1
+            if c_only:
+                # If we ever stop consolidating in setitem_with_indexer,
+                #  this will become unnecessary.
+                return
+
             assert (
                 sum(
                     get_base(x) is a
@@ -2554,23 +2565,18 @@ def check_views():
             # constructor preserves views
             check_views()
 
+        # TODO: most of the rest of this test belongs in indexing tests
         df.iloc[0, 0] = 0
         df.iloc[0, 1] = 0
         if not copy:
-            # Check that the underlying data behind df["c"] is still `c`
-            #  after setting with iloc.  Since we don't know which entry in
-            #  df._mgr.arrays corresponds to df["c"], we just check that exactly
-            #  one of these arrays is `c`.  GH#38939
-            assert sum(x is c for x in df._mgr.arrays) == 1
-            # TODO: we can call check_views if we stop consolidating
-            #  in setitem_with_indexer
+            check_views(True)
 
         # FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve
         #  view, so we have to check in the other direction
-        # df.iloc[0, 2] = 0
-        # if not copy:
-        #     check_views()
-        c[0] = 0
+        df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype)
+        assert df.dtypes.iloc[2] == c.dtype
+        if not copy:
+            check_views(True)
 
         if copy:
             if a.dtype.kind == "M":
@@ -2580,14 +2586,13 @@ def check_views():
                 assert a[0] == a.dtype.type(1)
                 assert b[0] == b.dtype.type(3)
             # FIXME(GH#35417): enable after GH#35417
-            # assert c[0] == 1
-            assert df.iloc[0, 2] == 1
+            assert c[0] == c_orig[0]  # i.e. df.iloc[0, 2]=45 did *not* update c
         else:
             # TODO: we can call check_views if we stop consolidating
             #  in setitem_with_indexer
-            # FIXME(GH#35417): enable after GH#35417
-            # assert b[0] == 0
-            assert df.iloc[0, 2] == 0
+            assert c[0] == 45  # i.e. df.iloc[0, 2]=45 *did* update c
+            # TODO: we can check b[0] == 0 if we stop consolidating in
+            #  setitem_with_indexer (except for datetimelike?)
 
     def test_from_series_with_name_with_columns(self):
         # GH 7893
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -875,16 +875,19 @@ def test_series_indexing_zerodim_np_array(self):
         result = s.iloc[np.array(0)]
         assert result == 1
 
-    @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457")
+    @td.skip_array_manager_not_yet_implemented
     def test_iloc_setitem_categorical_updates_inplace(self):
         # Mixed dtype ensures we go through take_split_path in setitem_with_indexer
         cat = Categorical(["A", "B", "C"])
-        df = DataFrame({1: cat, 2: [1, 2, 3]})
+        df = DataFrame({1: cat, 2: [1, 2, 3]}, copy=False)
+
+        assert tm.shares_memory(df[1], cat)
 
         # This should modify our original values in-place
         df.iloc[:, 0] = cat[::-1]
+        assert tm.shares_memory(df[1], cat)
 
-        expected = Categorical(["C", "B", "A"])
+        expected = Categorical(["C", "B", "A"], categories=["A", "B", "C"])
         tm.assert_categorical_equal(cat, expected)
 
     def test_iloc_with_boolean_operation(self):