From 5469912f74b554a8dc5fd1b6d1fcc1a8c07ef321 Mon Sep 17 00:00:00 2001
From: How Si Wei <swhow@drw.sg>
Date: Fri, 26 Jul 2019 14:42:57 +0800
Subject: [PATCH 1/2] Fix assignment to multiple columns when some column do
 not exist

---
 doc/source/whatsnew/v1.0.0.rst      | 31 +++++++++++++++
 pandas/core/frame.py                |  6 +++
 pandas/core/indexing.py             | 14 +++++++
 pandas/tests/frame/test_indexing.py | 59 ++++++++++++++++++++---------
 pandas/tests/indexing/test_loc.py   | 40 +++++++++++++++++++
 5 files changed, 132 insertions(+), 18 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 0be4ebc627b30..67a7a9387589c 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -40,6 +40,37 @@ Backwards incompatible API changes
 - :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`).
 -
 
+.. _whatsnew_1000.api_breaking.multicolumn_assignment:
+
+Assignment to multiple columns of a DataFrame when some columns do not exist
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Assignment to multiple columns of a :class:`DataFrame` when some of the columns do not exist would previously assign the values to the last column. Now, new columns would be constructed withe the right values. (:issue:`13658`)
+
+.. ipython:: python
+
+   df = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]})
+   df
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+   In [3]: df[['a', 'c']] = 1
+   In [4]: df
+   Out[4]:
+      a  b
+   0  1  1
+   1  1  1
+   2  1  1
+
+*New behavior*:
+
+.. ipython:: python
+
+   df[['a', 'c']] = 1
+   df
+
 .. _whatsnew_1000.api.other:
 
 Other API changes
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 603a615c1f8cb..77120a717ecd1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3007,6 +3007,12 @@ def _setitem_array(self, key, value):
                 for k1, k2 in zip(key, value.columns):
                     self[k1] = value[k2]
             else:
+                if all(is_hashable(k) for k in key):
+                    for k in key:
+                        try:
+                            self[k]
+                        except KeyError:
+                            self[k] = np.nan
                 indexer = self.loc._get_listlike_indexer(
                     key, axis=1, raise_missing=False
                 )[1]
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index ea00737f776ee..df8d73ebfa9fc 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -12,6 +12,7 @@
 from pandas.core.dtypes.common import (
     ensure_platform_int,
     is_float,
+    is_hashable,
     is_integer,
     is_integer_dtype,
     is_iterator,
@@ -197,6 +198,19 @@ def _get_setitem_indexer(self, key):
     def __setitem__(self, key, value):
         if isinstance(key, tuple):
             key = tuple(com.apply_if_callable(x, self.obj) for x in key)
+            if (
+                self.name == "loc"
+                and len(key) > 1
+                and is_list_like_indexer(key[1])
+                and not isinstance(key[1], tuple)
+                and not com.is_bool_indexer(key[1])
+                and all(is_hashable(k) for k in key[1])
+            ):
+                for k in key[1]:
+                    try:
+                        self.obj[k]
+                    except KeyError:
+                        self.obj[k] = np.nan
         else:
             key = com.apply_if_callable(key, self.obj)
         indexer = self._get_setitem_indexer(key)
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
index ae14563e5952a..7799832e277ca 100644
--- a/pandas/tests/frame/test_indexing.py
+++ b/pandas/tests/frame/test_indexing.py
@@ -208,6 +208,47 @@ def test_setitem_list_of_tuples(self, float_frame):
         expected = Series(tuples, index=float_frame.index, name="tuples")
         assert_series_equal(result, expected)
 
+    def test_setitem_list_all_missing_columns_scalar(self, float_frame):
+        # GH 26534
+        result = float_frame.copy()
+        result[["E", "F"]] = 1
+        expected = float_frame.copy()
+        # force the dtypes to be float as currently multcolumn assignment does not
+        # change column dtype from float to int even when it's being assigned an int
+        expected["E"] = 1.0
+        expected["F"] = 1.0
+        assert_frame_equal(result, expected)
+
+    def test_setitem_list_some_missing_columns_list(self, float_frame):
+        # GH 26534
+        result = float_frame.copy()
+        result[["A", "E"]] = [1, 2]
+        expected = float_frame.copy()
+        # force the dtypes to be float as currently multcolumn assignment does not
+        # change column dtype from float to int even when it's being assigned an int
+        expected["A"] = 1.0
+        expected["E"] = 2.0
+        assert_frame_equal(result, expected)
+
+    def test_setitem_list_some_missing_columns_dataframe(self, float_frame):
+        # GH 26534
+        result = float_frame.copy()
+        result[["A", "E"]] = float_frame[["B", "C"]]
+        expected = float_frame.copy()
+        expected["A"] = float_frame["B"]
+        expected["E"] = float_frame["C"]
+        assert_frame_equal(result, expected)
+
+    def test_setitem_list_some_missing_columns_2dlist(self):
+        # GH 26534
+        result = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
+        result[["B", "C", "D"]] = [[7, 8, 9], [10, 11, 12], [13, 14, 15]]
+        expected = pd.DataFrame(
+            [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]],
+            columns=["A", "B", "C", "D"],
+        )
+        tm.assert_frame_equal(result, expected)
+
     def test_setitem_mulit_index(self):
         # GH7655, test that assigning to a sub-frame of a frame
         # with multi-index columns aligns both rows and columns
@@ -501,13 +542,6 @@ def test_setitem(self, float_frame):
         float_frame["col6"] = series
         tm.assert_series_equal(series, float_frame["col6"], check_names=False)
 
-        msg = (
-            r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the"
-            r" \[columns\]\""
-        )
-        with pytest.raises(KeyError, match=msg):
-            float_frame[np.random.randn(len(float_frame) + 1)] = 1
-
         # set ndarray
         arr = np.random.randn(len(float_frame))
         float_frame["col9"] = arr
@@ -1143,17 +1177,6 @@ def test_fancy_index_int_labels_exceptions(self, float_frame):
             )
             with pytest.raises(KeyError, match=msg):
                 float_frame.ix[["foo", "bar", "baz"]] = 1
-            msg = (
-                r"None of \[Index\(\['E'\], dtype='object'\)\] are in the"
-                r" \[columns\]"
-            )
-            with pytest.raises(KeyError, match=msg):
-                float_frame.ix[:, ["E"]] = 1
-
-            # FIXME: don't leave commented-out
-            # partial setting now allows this GH2578
-            # pytest.raises(KeyError, float_frame.ix.__setitem__,
-            #               (slice(None, None), 'E'), 1)
 
     def test_setitem_fancy_mixed_2d(self, float_string_frame):
 
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index abe0cd86c90d7..56e1343433401 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -808,6 +808,46 @@ def test_loc_setitem_with_scalar_index(self, indexer, value):
 
         assert is_scalar(result) and result == "Z"
 
+    def test_loc_setitem_missing_columns_scalar_index_list_value(self):
+        # GH 26534
+        df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
+        df.loc[1, ["C", "D"]] = [7, 8]
+        expected = pd.DataFrame(
+            [[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]],
+            columns=["A", "B", "C", "D"],
+        )
+        tm.assert_frame_equal(df, expected)
+
+    def test_loc_setitem_missing_columns_full_index_dataframe_value(self):
+        # GH 26534
+        df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
+        df2 = pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"])
+        df.loc[:, ["A", "C"]] = df2
+        expected = pd.DataFrame(
+            [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]
+        )
+        tm.assert_frame_equal(df, expected)
+
+    def test_loc_setitem_missing_columns_list_index_scalar_value(self):
+        # GH 26534
+        df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
+        df.loc[[0, 2], ["B", "C", "D"]] = 9
+        expected = pd.DataFrame(
+            [[1, 9, 9, 9], [3, 4, np.nan, np.nan], [5, 9, 9, 9]],
+            columns=["A", "B", "C", "D"],
+        )
+        tm.assert_frame_equal(df, expected)
+
+    def test_loc_setitem_missing_columns_range_index_2dlist_value(self):
+        # GH 26534
+        df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
+        df.loc[1:3, ["B", "C", "D"]] = [[7, 8, 9], [10, 11, 12]]
+        expected = pd.DataFrame(
+            [[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]],
+            columns=["A", "B", "C", "D"],
+        )
+        tm.assert_frame_equal(df, expected)
+
     def test_loc_coercion(self):
 
         # 12411

From 3622744780c951e0f7e562ba543091478791a19c Mon Sep 17 00:00:00 2001
From: How Si Wei <git@howsiwei.me>
Date: Sun, 18 Aug 2019 15:19:33 +0800
Subject: [PATCH 2/2] Parametrize some tests

---
 pandas/tests/frame/test_indexing.py | 54 +++++++++------------------
 pandas/tests/indexing/test_loc.py   | 58 +++++++++++------------------
 2 files changed, 39 insertions(+), 73 deletions(-)

diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
index 7799832e277ca..e4ed5f88afd84 100644
--- a/pandas/tests/frame/test_indexing.py
+++ b/pandas/tests/frame/test_indexing.py
@@ -208,47 +208,29 @@ def test_setitem_list_of_tuples(self, float_frame):
         expected = Series(tuples, index=float_frame.index, name="tuples")
         assert_series_equal(result, expected)
 
-    def test_setitem_list_all_missing_columns_scalar(self, float_frame):
-        # GH 26534
-        result = float_frame.copy()
-        result[["E", "F"]] = 1
-        expected = float_frame.copy()
-        # force the dtypes to be float as currently multcolumn assignment does not
-        # change column dtype from float to int even when it's being assigned an int
-        expected["E"] = 1.0
-        expected["F"] = 1.0
-        assert_frame_equal(result, expected)
-
-    def test_setitem_list_some_missing_columns_list(self, float_frame):
-        # GH 26534
-        result = float_frame.copy()
-        result[["A", "E"]] = [1, 2]
-        expected = float_frame.copy()
-        # force the dtypes to be float as currently multcolumn assignment does not
-        # change column dtype from float to int even when it's being assigned an int
-        expected["A"] = 1.0
-        expected["E"] = 2.0
-        assert_frame_equal(result, expected)
-
-    def test_setitem_list_some_missing_columns_dataframe(self, float_frame):
+    @pytest.mark.parametrize("columns", [["A", "E"], ["E", "F"]])
+    @pytest.mark.parametrize(
+        "box",
+        [
+            lambda x: 1,
+            lambda x: [1, 2],
+            lambda x: np.array([1, 2]),
+            lambda x: x[["B", "C"]],
+            lambda x: x[["B", "A"]].values,
+            lambda x: x[["A", "C"]].values.tolist(),
+        ],
+    )
+    def test_setitem_list_missing_columns(self, float_frame, columns, box):
         # GH 26534
         result = float_frame.copy()
-        result[["A", "E"]] = float_frame[["B", "C"]]
+        result[columns] = box(float_frame)
         expected = float_frame.copy()
-        expected["A"] = float_frame["B"]
-        expected["E"] = float_frame["C"]
+        for col in columns:
+            if col not in expected.columns:
+                expected[col] = np.nan
+        expected[columns] = box(float_frame)
         assert_frame_equal(result, expected)
 
-    def test_setitem_list_some_missing_columns_2dlist(self):
-        # GH 26534
-        result = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
-        result[["B", "C", "D"]] = [[7, 8, 9], [10, 11, 12], [13, 14, 15]]
-        expected = pd.DataFrame(
-            [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]],
-            columns=["A", "B", "C", "D"],
-        )
-        tm.assert_frame_equal(result, expected)
-
     def test_setitem_mulit_index(self):
         # GH7655, test that assigning to a sub-frame of a frame
         # with multi-index columns aligns both rows and columns
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 56e1343433401..ca559e0ca59c7 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -808,45 +808,29 @@ def test_loc_setitem_with_scalar_index(self, indexer, value):
 
         assert is_scalar(result) and result == "Z"
 
-    def test_loc_setitem_missing_columns_scalar_index_list_value(self):
-        # GH 26534
-        df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
-        df.loc[1, ["C", "D"]] = [7, 8]
-        expected = pd.DataFrame(
-            [[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]],
-            columns=["A", "B", "C", "D"],
-        )
-        tm.assert_frame_equal(df, expected)
-
-    def test_loc_setitem_missing_columns_full_index_dataframe_value(self):
-        # GH 26534
-        df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
-        df2 = pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"])
-        df.loc[:, ["A", "C"]] = df2
-        expected = pd.DataFrame(
-            [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]
-        )
-        tm.assert_frame_equal(df, expected)
-
-    def test_loc_setitem_missing_columns_list_index_scalar_value(self):
-        # GH 26534
-        df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
-        df.loc[[0, 2], ["B", "C", "D"]] = 9
-        expected = pd.DataFrame(
-            [[1, 9, 9, 9], [3, 4, np.nan, np.nan], [5, 9, 9, 9]],
-            columns=["A", "B", "C", "D"],
-        )
-        tm.assert_frame_equal(df, expected)
-
-    def test_loc_setitem_missing_columns_range_index_2dlist_value(self):
+    @pytest.mark.parametrize(
+        "index,box",
+        [
+            ((1, ["C", "D"]), [7, 8]),
+            (
+                (slice(None, None, None), ["A", "C"]),
+                pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]),
+            ),
+            (([0, 2], ["B", "C", "D"]), 9),
+            ((slice(1, 3, None), ["B", "C", "D"]), [[7, 8, 9], [10, 11, 12]]),
+        ],
+    )
+    def test_loc_setitem_missing_columns(self, index, box):
         # GH 26534
         df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
-        df.loc[1:3, ["B", "C", "D"]] = [[7, 8, 9], [10, 11, 12]]
-        expected = pd.DataFrame(
-            [[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]],
-            columns=["A", "B", "C", "D"],
-        )
-        tm.assert_frame_equal(df, expected)
+        result = df.copy()
+        result.loc[index] = box
+        expected = df.copy()
+        for col in index[1]:
+            if col not in expected.columns:
+                expected[col] = np.nan
+        expected.loc[index] = box
+        tm.assert_frame_equal(result, expected)
 
     def test_loc_coercion(self):