From 433098475b2914cfe518807adfd7614691a762d4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 28 Feb 2020 13:24:50 -0800
Subject: [PATCH 1/9] Fix iloc setitem with duplicate columns for scalar case

---
 pandas/core/frame.py               | 12 +++++++
 pandas/core/generic.py             |  4 +++
 pandas/core/indexing.py            | 52 +++++++++++++++++++++---------
 pandas/core/internals/managers.py  | 14 ++++++++
 pandas/tests/indexing/test_iloc.py | 19 +++++++++++
 5 files changed, 86 insertions(+), 15 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f304fadbab871..8bb6a7e8c8621 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2708,6 +2708,18 @@ def _setitem_frame(self, key, value):
         self._check_setitem_copy()
         self._where(-key, value, inplace=True)
 
+    def _iset_item(self, loc: int, value):
+        self._ensure_valid_index(value)
+        value = self._sanitize_column(loc, value)
+        # FIXME: sanitize_column isnt for iloc
+        NDFrame._iset_item(self, loc, value)
+
+        # check if we are modifying a copy
+        # try to set first as we want an invalid
+        # value exception to occur first
+        if len(self):
+            self._check_setitem_copy()
+
     def _set_item(self, key, value):
         """
         Add series to DataFrame in specified column.
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 25770c2c6470c..3558f1569098e 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3490,6 +3490,10 @@ def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries:
         result._set_is_copy(self, copy=is_copy)
         return result
 
+    def _iset_item(self, loc: int, value) -> None:
+        self._data.iset(loc, value)
+        self._clear_item_cache()
+
     def _set_item(self, key, value) -> None:
         self._data.set(key, value)
         self._clear_item_cache()
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 3ab180bafd156..aa24f49adef73 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1633,14 +1633,12 @@ def _setitem_with_indexer(self, indexer, value):
                 info_idx = [info_idx]
             labels = item_labels[info_idx]
 
+            plane_indexer = indexer[:1]
+            lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index)
+            # lplane_indexer gives the expected length of obj[indexer[0]]
+
             if len(labels) == 1:
                 # We can operate on a single column
-                item = labels[0]
-                idx = indexer[0]
-
-                plane_indexer = tuple([idx])
-                lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index)
-                # lplane_indexer gives the expected length of obj[idx]
 
                 # require that we are setting the right number of values that
                 # we are indexing
@@ -1652,14 +1650,33 @@ def _setitem_with_indexer(self, indexer, value):
                         "length than the value"
                     )
 
-            # non-mi
-            else:
-                plane_indexer = indexer[:1]
-                lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index)
+            pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
+
+            def isetter(loc, v):
+                ser = self.obj._ixs(loc, axis=1)
+
+                # perform the equivalent of a setitem on the info axis
+                # as we have a null slice or a slice with full bounds
+                # which means essentially reassign to the columns of a
+                # multi-dim object
+                # GH6149 (null slice), GH10408 (full bounds)
+                if isinstance(pi, tuple) and all(
+                    com.is_null_slice(idx) or com.is_full_slice(idx, len(self.obj))
+                    for idx in pi
+                ):
+                    ser = v
+                else:
+                    # set the item, possibly having a dtype change
+                    ser._consolidate_inplace()
+                    ser = ser.copy()
+                    ser._data = ser._data.setitem(indexer=pi, value=v)
+                    ser._maybe_update_cacher(clear=True)
+
+                # reset the sliced object if unique
+                self.obj._iset_item(loc, ser)
 
             def setter(item, v):
                 ser = self.obj[item]
-                pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
 
                 # perform the equivalent of a setitem on the info axis
                 # as we have a null slice or a slice with full bounds
@@ -1740,13 +1757,16 @@ def setter(item, v):
                         setter(item, v)
             else:
 
-                # scalar
-                for item in labels:
-                    setter(item, value)
+                # scalar value
+                if isinstance(info_idx, slice):
+                    # TODO: wrong place for this
+                    ri = Index(range(len(self.obj.columns)))
+                    info_idx = ri[info_idx]
+                for loc in info_idx:
+                    isetter(loc, value)
 
         else:
             if isinstance(indexer, tuple):
-                indexer = maybe_convert_ix(*indexer)
 
                 # if we are setting on the info axis ONLY
                 # set using those methods to avoid block-splitting
@@ -1764,6 +1784,8 @@ def setter(item, v):
                     self.obj[item_labels[indexer[info_axis]]] = value
                     return
 
+                indexer = maybe_convert_ix(*indexer)
+
             if isinstance(value, (ABCSeries, dict)):
                 # TODO(EA): ExtensionBlock.setitem this causes issues with
                 # setting for extensionarrays that store dicts. Need to decide
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index e397167e4881f..49c9de39eb438 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -995,6 +995,20 @@ def delete(self, item):
         self._shape = None
         self._rebuild_blknos_and_blklocs()
 
+    def iset(self, loc: int, value):
+        if loc >= len(self.items) or self.ndim != 2:
+            raise NotImplementedError
+
+        items = self.items
+        try:
+            # In order to re-use `set`, we temporarily patch our items
+            # TODO: This really should defer the other way around
+            self.axes[0] = Index(range(len(items)))
+
+            self.set(loc, value)
+        finally:
+            self.axes[0] = items
+
     def set(self, item, value):
         """
         Set new item in-place. Does not consolidate. Adds new Block if not
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 683d4f2605712..e9ef244353b0d 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -694,3 +694,22 @@ def test_series_indexing_zerodim_np_array(self):
         s = Series([1, 2])
         result = s.iloc[np.array(0)]
         assert result == 1
+
+
+class TestILocSetItemDuplicateColumns:
+    def test_iloc_setitem_scalar_duplicate_columns(self):
+        # GH#15686, duplicate columns and mixed dtype
+        df1 = pd.DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
+        df2 = pd.DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
+        df = pd.concat([df1, df2], axis=1)
+        df.iloc[0, 0] = -1
+
+        assert df.iloc[0, 0] == -1
+        assert df.iloc[0, 2] == 3
+        assert df.dtypes.iloc[2] == np.int64
+
+    @pytest.mark.xfail(reason="only scalars fixed so far")
+    def test_iloc_setitem_series_duplicate_columns(self):
+        df = pd.DataFrame(np.arange(8).reshape(2, 4), columns=["A", "B", "A", "B"])
+        df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64)
+        assert df.dtypes.iloc[2] == np.int64  # Still broken

From 620c5b9c4cf91d5cedd5b451e084b0a4a029ced9 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 28 Feb 2020 15:19:36 -0800
Subject: [PATCH 2/9] Fix setitem in more cases

---
 pandas/core/indexing.py            | 39 +++++++++++++++---------------
 pandas/tests/indexing/test_iloc.py | 13 +++++++---
 2 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index aa24f49adef73..96aa1e6d6d581 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1633,6 +1633,12 @@ def _setitem_with_indexer(self, indexer, value):
                 info_idx = [info_idx]
             labels = item_labels[info_idx]
 
+            # Ensure we have something we can iterate over
+            ilocs = info_idx
+            if isinstance(info_idx, slice):
+                ri = Index(range(len(self.obj.columns)))
+                ilocs = ri[info_idx]
+
             plane_indexer = indexer[:1]
             lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index)
             # lplane_indexer gives the expected length of obj[indexer[0]]
@@ -1727,42 +1733,38 @@ def setter(item, v):
                     # note that this coerces the dtype if we are mixed
                     # GH 7551
                     value = np.array(value, dtype=object)
-                    if len(labels) != value.shape[1]:
+                    if len(ilocs) != value.shape[1]:
                         raise ValueError(
                             "Must have equal len keys and value "
                             "when setting with an ndarray"
                         )
 
-                    for i, item in enumerate(labels):
-
-                        # setting with a list, recoerces
-                        setter(item, value[:, i].tolist())
+                    for i, loc in enumerate(ilocs):
+                        # setting with a list, re-coerces
+                        isetter(loc, value[:, i].tolist())
 
                 # we have an equal len list/ndarray
                 elif _can_do_equal_len(
-                    labels, value, plane_indexer, lplane_indexer, self.obj
+                    labels, value, plane_indexer, lplane_indexer, self.obj.index
                 ):
-                    setter(labels[0], value)
+                    # We only get here with len(labels) == len(ilocs) == 1
+                    isetter(ilocs[0], value)
 
                 # per label values
                 else:
 
-                    if len(labels) != len(value):
+                    if len(ilocs) != len(value):
                         raise ValueError(
                             "Must have equal len keys and value "
                             "when setting with an iterable"
                         )
 
-                    for item, v in zip(labels, value):
-                        setter(item, v)
+                    for loc, v in zip(ilocs, value):
+                        isetter(loc, v)
             else:
 
                 # scalar value
-                if isinstance(info_idx, slice):
-                    # TODO: wrong place for this
-                    ri = Index(range(len(self.obj.columns)))
-                    info_idx = ri[info_idx]
-                for loc in info_idx:
+                for loc in ilocs:
                     isetter(loc, value)
 
         else:
@@ -2301,7 +2303,7 @@ def _maybe_numeric_slice(df, slice_, include_bool=False):
     return slice_
 
 
-def _can_do_equal_len(labels, value, plane_indexer, lplane_indexer, obj) -> bool:
+def _can_do_equal_len(labels, value, plane_indexer, lplane_indexer, index) -> bool:
     """
     Returns
     -------
@@ -2311,12 +2313,11 @@ def _can_do_equal_len(labels, value, plane_indexer, lplane_indexer, obj) -> bool
     if not len(labels) == 1 or not np.iterable(value) or is_scalar(plane_indexer[0]):
         return False
 
-    item = labels[0]
-    index = obj[item].index
-
     values_len = len(value)
     # equal len list/ndarray
     if len(index) == values_len:
+        # FIXME: this looks wrong; the only test that breaks if we do this
+        #  has an all-False boolean mask here
         return True
     elif lplane_indexer == values_len:
         return True
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index e9ef244353b0d..d5164044c4952 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -349,7 +349,6 @@ def test_iloc_setitem_dups(self):
         df = concat([df1, df2], axis=1)
 
         expected = df.fillna(3)
-        expected["A"] = expected["A"].astype("float64")
         inds = np.isnan(df.iloc[:, 0])
         mask = inds[inds].index
         df.iloc[mask, 0] = df.iloc[mask, 2]
@@ -708,8 +707,16 @@ def test_iloc_setitem_scalar_duplicate_columns(self):
         assert df.iloc[0, 2] == 3
         assert df.dtypes.iloc[2] == np.int64
 
-    @pytest.mark.xfail(reason="only scalars fixed so far")
+    def test_iloc_setitem_list_duplicate_columns(self):
+        # GH#22036 setting with same-sized list
+        df = pd.DataFrame([[0, "str", "str2"]], columns=["a", "b", "b"])
+
+        df.iloc[:, 2] = ["str3"]
+
+        expected = pd.DataFrame([[0, "str", "str3"]], columns=["a", "b", "b"])
+        tm.assert_frame_equal(df, expected)
+
     def test_iloc_setitem_series_duplicate_columns(self):
         df = pd.DataFrame(np.arange(8).reshape(2, 4), columns=["A", "B", "A", "B"])
         df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64)
-        assert df.dtypes.iloc[2] == np.int64  # Still broken
+        assert df.dtypes.iloc[2] == np.int64

From 08f9df3dacc2ec5087f9757181a78968703996cb Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 28 Feb 2020 16:06:53 -0800
Subject: [PATCH 3/9] BUG: fix setitem_with_indexer

---
 pandas/core/indexing.py | 29 ++++-------------------------
 1 file changed, 4 insertions(+), 25 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 96aa1e6d6d581..ab320e3f1b78d 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1681,29 +1681,6 @@ def isetter(loc, v):
                 # reset the sliced object if unique
                 self.obj._iset_item(loc, ser)
 
-            def setter(item, v):
-                ser = self.obj[item]
-
-                # perform the equivalent of a setitem on the info axis
-                # as we have a null slice or a slice with full bounds
-                # which means essentially reassign to the columns of a
-                # multi-dim object
-                # GH6149 (null slice), GH10408 (full bounds)
-                if isinstance(pi, tuple) and all(
-                    com.is_null_slice(idx) or com.is_full_slice(idx, len(self.obj))
-                    for idx in pi
-                ):
-                    ser = v
-                else:
-                    # set the item, possibly having a dtype change
-                    ser._consolidate_inplace()
-                    ser = ser.copy()
-                    ser._data = ser._data.setitem(indexer=pi, value=v)
-                    ser._maybe_update_cacher(clear=True)
-
-                # reset the sliced object if unique
-                self.obj[item] = ser
-
             # we need an iterable, with a ndim of at least 1
             # eg. don't pass through np.array(0)
             if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:
@@ -1712,8 +1689,10 @@ def setter(item, v):
                 if isinstance(value, ABCDataFrame):
                     sub_indexer = list(indexer)
                     multiindex_indexer = isinstance(labels, ABCMultiIndex)
+                    # TODO: we are implicitly assuming value.columns is unique
 
-                    for item in labels:
+                    for loc in ilocs:
+                        item = item_labels[loc]
                         if item in value:
                             sub_indexer[info_axis] = item
                             v = self._align_series(
@@ -1722,7 +1701,7 @@ def setter(item, v):
                         else:
                             v = np.nan
 
-                        setter(item, v)
+                        isetter(loc, v)
 
                 # we have an equal len ndarray/convertible to our labels
                 # hasattr first, to avoid coercing to ndarray without reason.

From 5b4711daf33d2019a8abc6f7a596df6de3d2993a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 28 Feb 2020 19:35:28 -0800
Subject: [PATCH 4/9] comment, no copy needed

---
 pandas/core/internals/managers.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 49c9de39eb438..146c934e1b2a6 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1050,6 +1050,9 @@ def value_getitem(placement):
             return
 
         if isinstance(loc, int):
+            # We have 6 tests where loc is _not_ an int.
+            # In this case, get_blkno_placements will yield only one tuple,
+            #  containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1)))
             loc = [loc]
 
         blknos = self._blknos[loc]
@@ -1100,7 +1103,7 @@ def value_getitem(placement):
                 # one item.
                 new_blocks.extend(
                     make_block(
-                        values=value.copy(),
+                        values=value,
                         ndim=self.ndim,
                         placement=slice(mgr_loc, mgr_loc + 1),
                     )

From 906304765e8e11d020d0b651817c8ad036ba81cd Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 3 Mar 2020 10:15:06 -0800
Subject: [PATCH 5/9] update check

---
 pandas/core/internals/managers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index dffe4bb97734a..cb74ea1387018 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1052,7 +1052,7 @@ def value_getitem(placement):
                     "Shape of new values must be compatible with manager shape"
                 )
 
-        if isinstance(loc, int):
+        if lib.is_integer(loc):
             # We have 6 tests where loc is _not_ an int.
             # In this case, get_blkno_placements will yield only one tuple,
             #  containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1)))

From 280418ef7aee2c76dcc1a0559db8a47d72dd79f6 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 5 Mar 2020 16:31:05 -0800
Subject: [PATCH 6/9] comment

---
 pandas/core/frame.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f6c4f860c6dc5..bac8fdb8acf89 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2708,8 +2708,10 @@ def _setitem_frame(self, key, value):
 
     def _iset_item(self, loc: int, value):
         self._ensure_valid_index(value)
-        value = self._sanitize_column(loc, value)
-        # FIXME: sanitize_column isnt for iloc
+
+        # passing loc to sanitize_column is a misnomer, but harmless
+        #  with broadcast=False as long as value is never a DataFrame
+        value = self._sanitize_column(loc, value, broadcast=False)
         NDFrame._iset_item(self, loc, value)
 
         # check if we are modifying a copy

From 4a847e509d4ff075266c8b0d5ca031f513f7da06 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 5 Mar 2020 16:35:31 -0800
Subject: [PATCH 7/9] whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 44deab25db695..c7e679ae9c0a1 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -260,7 +260,7 @@ Indexing
 - Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`)
 - Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`)
 - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
--
+- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
 
 Missing
 ^^^^^^^

From 540d3eab42a193e71db6ce0a9862beb0ea5112da Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 6 Mar 2020 10:30:40 -0800
Subject: [PATCH 8/9] fix test on 32bit platform

---
 pandas/tests/indexing/test_iloc.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index d5164044c4952..f6b9e9a44ba14 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -717,6 +717,8 @@ def test_iloc_setitem_list_duplicate_columns(self):
         tm.assert_frame_equal(df, expected)
 
     def test_iloc_setitem_series_duplicate_columns(self):
-        df = pd.DataFrame(np.arange(8).reshape(2, 4), columns=["A", "B", "A", "B"])
+        df = pd.DataFrame(
+            np.arange(8, dtype=np.int64).reshape(2, 4), columns=["A", "B", "A", "B"]
+        )
         df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64)
         assert df.dtypes.iloc[2] == np.int64

From 3a17ba8b5ab148cee97e97d0f02ef27a24934af2 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 9 Mar 2020 10:09:38 -0700
Subject: [PATCH 9/9] comments

---
 pandas/core/frame.py    | 4 ++--
 pandas/core/indexing.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4ea0a64f8995e..83d44e546356c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2709,8 +2709,8 @@ def _setitem_frame(self, key, value):
     def _iset_item(self, loc: int, value):
         self._ensure_valid_index(value)
 
-        # passing loc to sanitize_column is a misnomer, but harmless
-        #  with broadcast=False as long as value is never a DataFrame
+        # technically _sanitize_column expects a label, not a position,
+        #  but the behavior is the same as long as we pass broadcast=False
         value = self._sanitize_column(loc, value, broadcast=False)
         NDFrame._iset_item(self, loc, value)
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index f67f301959a5a..c9362a0527c06 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1641,6 +1641,7 @@ def _setitem_with_indexer(self, indexer, value):
             pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
 
             def isetter(loc, v):
+                # positional setting on column loc
                 ser = self.obj._ixs(loc, axis=1)
 
                 # perform the equivalent of a setitem on the info axis