From 25ad42223005c397849febbcfa5a915fc5b2d7b9 Mon Sep 17 00:00:00 2001
From: How Si Wei <git@howsiwei.me>
Date: Thu, 20 Feb 2020 00:48:39 +0800
Subject: [PATCH 1/6] Add tests for setting missing columns

---
 pandas/tests/frame/indexing/test_indexing.py | 64 +++++++++++++++++---
 pandas/tests/indexing/test_loc.py            | 58 ++++++++++++++++++
 2 files changed, 115 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 997414eceeb86..d66be2eacdc8b 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -215,6 +215,63 @@ def test_setitem_list_of_tuples(self, float_frame):
         expected = Series(tuples, index=float_frame.index, name="tuples")
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "columns,box,expected",
+        [
+            (
+                ["A", "B", "C", "D"],
+                7,
+                pd.DataFrame(
+                    [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]],
+                    columns=["A", "B", "C", "D"],
+                ),
+            ),
+            (
+                ["C", "D"],
+                [7, 8],
+                pd.DataFrame(
+                    [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]],
+                    columns=["A", "B", "C", "D"],
+                ),
+            ),
+            (
+                ["A", "B", "C"],
+                np.array([7, 8, 9], dtype=np.int64),
+                pd.DataFrame(
+                    [[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"]
+                ),
+            ),
+            (
+                ["B", "C", "D"],
+                [[7, 8, 9], [10, 11, 12], [13, 14, 15]],
+                pd.DataFrame(
+                    [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]],
+                    columns=["A", "B", "C", "D"],
+                ),
+            ),
+            (
+                ["C", "A", "D"],
+                np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64),
+                pd.DataFrame(
+                    [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]],
+                    columns=["A", "B", "C", "D"],
+                ),
+            ),
+            (
+                ["A", "C"],
+                pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]),
+                pd.DataFrame(
+                    [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]
+                ),
+            ),
+        ],
+    )
+    def test_setitem_list_missing_columns(self, columns, box, expected):
+        # GH 29334
+        df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
+        df[columns] = box
+        tm.assert_frame_equal(df, expected)
+
     def test_setitem_multi_index(self):
         # GH7655, test that assigning to a sub-frame of a frame
         # with multi-index columns aligns both rows and columns
@@ -459,13 +516,6 @@ def test_setitem(self, float_frame):
         float_frame["col6"] = series
         tm.assert_series_equal(series, float_frame["col6"], check_names=False)
 
-        msg = (
-            r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the "
-            r"\[columns\]\""
-        )
-        with pytest.raises(KeyError, match=msg):
-            float_frame[np.random.randn(len(float_frame) + 1)] = 1
-
         # set ndarray
         arr = np.random.randn(len(float_frame))
         float_frame["col9"] = arr
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 4d042af8d59b4..010ef89393b85 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -631,6 +631,64 @@ def test_loc_setitem_with_scalar_index(self, indexer, value):
 
         assert is_scalar(result) and result == "Z"
 
+    @pytest.mark.parametrize(
+        "index,box,expected",
+        [
+            (
+                ([0, 2], ["A", "B", "C", "D"]),
+                7,
+                pd.DataFrame(
+                    [[7, 7, 7, 7], [3, 4, np.nan, np.nan], [7, 7, 7, 7]],
+                    columns=["A", "B", "C", "D"],
+                ),
+            ),
+            (
+                (1, ["C", "D"]),
+                [7, 8],
+                pd.DataFrame(
+                    [[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]],
+                    columns=["A", "B", "C", "D"],
+                ),
+            ),
+            (
+                (1, ["A", "B", "C"]),
+                np.array([7, 8, 9], dtype=np.int64),
+                pd.DataFrame(
+                    [[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]],
+                    columns=["A", "B", "C"],
+                ),
+            ),
+            (
+                (slice(1, 3, None), ["B", "C", "D"]),
+                [[7, 8, 9], [10, 11, 12]],
+                pd.DataFrame(
+                    [[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]],
+                    columns=["A", "B", "C", "D"],
+                ),
+            ),
+            (
+                (slice(1, 3, None), ["C", "A", "D"]),
+                np.array([[7, 8, 9], [10, 11, 12]], dtype=np.int64),
+                pd.DataFrame(
+                    [[1, 2, np.nan, np.nan], [8, 4, 7, 9], [11, 6, 10, 12]],
+                    columns=["A", "B", "C", "D"],
+                ),
+            ),
+            (
+                (slice(None, None, None), ["A", "C"]),
+                pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]),
+                pd.DataFrame(
+                    [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]
+                ),
+            ),
+        ],
+    )
+    def test_loc_setitem_missing_columns(self, index, box, expected):
+        # GH 29334
+        df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
+        df.loc[index] = box
+        tm.assert_frame_equal(df, expected)
+
     def test_loc_coercion(self):
 
         # 12411

From b8d3c48377a4ead92531715f36b658c134af9340 Mon Sep 17 00:00:00 2001
From: How Si Wei <git@howsiwei.me>
Date: Thu, 20 Feb 2020 00:49:29 +0800
Subject: [PATCH 2/6] Fix assignment to missing columns

---
 pandas/core/frame.py    |  1 +
 pandas/core/indexing.py | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f304fadbab871..e9ddaaf34e777 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2685,6 +2685,7 @@ def _setitem_array(self, key, value):
                 for k1, k2 in zip(key, value.columns):
                     self[k1] = value[k2]
             else:
+                self.loc._ensure_listlike_indexer(key)
                 indexer = self.loc._get_listlike_indexer(
                     key, axis=1, raise_missing=False
                 )[1]
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 3ab180bafd156..87ab491653356 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -8,6 +8,7 @@
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.common import (
+    is_hashable,
     is_integer,
     is_iterator,
     is_list_like,
@@ -581,6 +582,9 @@ def _get_setitem_indexer(self, key):
         """
         Convert a potentially-label-based key into a positional indexer.
         """
+        if self.name == "loc":
+            self._ensure_listlike_indexer(key)
+
         if self.axis is not None:
             return self._convert_tuple(key, is_setter=True)
 
@@ -611,6 +615,39 @@ def _get_setitem_indexer(self, key):
                 raise
             raise IndexingError(key) from e
 
+    def _ensure_listlike_indexer(self, key):
+        """
+        Ensure that a list-like of column labels are all present by adding them if
+        they do not already exist.
+
+        Parameters
+        ----------
+        key : _LocIndexer key or list-like of column labels
+            Target labels.
+        """
+        column_axis = 1
+
+        # column only exists in 2-dimensional DataFrame
+        if self.ndim != 2:
+            return
+
+        if isinstance(key, tuple):
+            # key may be a tuple if key is a _LocIndexer key
+            # in that case, set key to the column part of key
+            key = key[column_axis]
+
+        if (
+            not isinstance(self.obj._get_axis(column_axis), ABCMultiIndex)
+            and is_list_like_indexer(key)
+            and not com.is_bool_indexer(key)
+            and all(is_hashable(k) for k in key)
+        ):
+            for k in key:
+                try:
+                    self.obj[k]
+                except KeyError:
+                    self.obj[k] = np.nan
+
     def __setitem__(self, key, value):
         if isinstance(key, tuple):
             key = tuple(com.apply_if_callable(x, self.obj) for x in key)

From 660d0f28bf1206bd3bea71cfbf950a96d2d5ccc9 Mon Sep 17 00:00:00 2001
From: How Si Wei <git@howsiwei.me>
Date: Thu, 20 Feb 2020 00:53:01 +0800
Subject: [PATCH 3/6] Add whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 0f18a1fd81815..225b1081e182b 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -259,6 +259,35 @@ Indexing
 - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
 -
 
+Assignment to multiple columns of a DataFrame when some columns do not exist
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Assignment to multiple columns of a :class:`DataFrame` when some of the columns do not exist would previously assign the values to the last column. Now, new columns would be constructed with the right values. (:issue:`13658`)
+
+.. ipython:: python
+
+   df = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]})
+   df
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+   In [3]: df[['a', 'c']] = 1
+   In [4]: df
+   Out[4]:
+      a  b
+   0  1  1
+   1  1  1
+   2  1  1
+
+*New behavior*:
+
+.. ipython:: python
+
+   df[['a', 'c']] = 1
+   df
+
 Missing
 ^^^^^^^
 

From d7184e72c9a6fcc3c565ce78dbc3e3030119a463 Mon Sep 17 00:00:00 2001
From: How Si Wei <git@howsiwei.me>
Date: Thu, 20 Feb 2020 00:54:45 +0800
Subject: [PATCH 4/6] Pass axis to _ensure_listlike_indexer

---
 pandas/core/frame.py    | 2 +-
 pandas/core/indexing.py | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e9ddaaf34e777..65dfe61b93f2a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2685,7 +2685,7 @@ def _setitem_array(self, key, value):
                 for k1, k2 in zip(key, value.columns):
                     self[k1] = value[k2]
             else:
-                self.loc._ensure_listlike_indexer(key)
+                self.loc._ensure_listlike_indexer(key, axis=1)
                 indexer = self.loc._get_listlike_indexer(
                     key, axis=1, raise_missing=False
                 )[1]
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 87ab491653356..8bafc69eb6be7 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -615,7 +615,7 @@ def _get_setitem_indexer(self, key):
                 raise
             raise IndexingError(key) from e
 
-    def _ensure_listlike_indexer(self, key):
+    def _ensure_listlike_indexer(self, key, axis=None):
         """
         Ensure that a list-like of column labels are all present by adding them if
         they do not already exist.
@@ -624,6 +624,7 @@ def _ensure_listlike_indexer(self, key):
         ----------
         key : _LocIndexer key or list-like of column labels
             Target labels.
+        axis : key axis if known
         """
         column_axis = 1
 
@@ -635,9 +636,11 @@ def _ensure_listlike_indexer(self, key):
             # key may be a tuple if key is a _LocIndexer key
             # in that case, set key to the column part of key
             key = key[column_axis]
+            axis = column_axis
 
         if (
-            not isinstance(self.obj._get_axis(column_axis), ABCMultiIndex)
+            axis == column_axis
+            and not isinstance(self.obj._get_axis(column_axis), ABCMultiIndex)
             and is_list_like_indexer(key)
             and not com.is_bool_indexer(key)
             and all(is_hashable(k) for k in key)

From a509eb57989c01a470c2308477095b9bff857c58 Mon Sep 17 00:00:00 2001
From: How Si Wei <git@howsiwei.me>
Date: Sat, 7 Mar 2020 03:20:40 +0800
Subject: [PATCH 5/6] Use DataFrame.columns

---
 pandas/core/indexing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 8bafc69eb6be7..2e727ab55c706 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -640,7 +640,7 @@ def _ensure_listlike_indexer(self, key, axis=None):
 
         if (
             axis == column_axis
-            and not isinstance(self.obj._get_axis(column_axis), ABCMultiIndex)
+            and not isinstance(self.obj.columns, ABCMultiIndex)
             and is_list_like_indexer(key)
             and not com.is_bool_indexer(key)
             and all(is_hashable(k) for k in key)

From 26ba2a998f8d593f5dbc7d0b5a37905ec1e8f3f1 Mon Sep 17 00:00:00 2001
From: How Si Wei <git@howsiwei.me>
Date: Sun, 15 Mar 2020 03:40:09 +0800
Subject: [PATCH 6/6] Update documentation

---
 doc/source/whatsnew/v1.1.0.rst | 60 ++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 4f8c67374bf3e..caf37a77f8216 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -168,6 +168,37 @@ key and type of :class:`Index`.  These now consistently raise ``KeyError`` (:iss
 
 .. ---------------------------------------------------------------------------
 
+.. _whatsnew_110.api_breaking.assignment_to_multiple_columns:
+
+Assignment to multiple columns of a DataFrame when some columns do not exist
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Assignment to multiple columns of a :class:`DataFrame` when some of the columns do not exist would previously assign the values to the last column. Now, new columns would be constructed with the right values. (:issue:`13658`)
+
+.. ipython:: python
+
+   df = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]})
+   df
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+   In [3]: df[['a', 'c']] = 1
+   In [4]: df
+   Out[4]:
+      a  b
+   0  1  1
+   1  1  1
+   2  1  1
+
+*New behavior*:
+
+.. ipython:: python
+
+   df[['a', 'c']] = 1
+   df
+
 .. _whatsnew_110.deprecations:
 
 Deprecations
@@ -267,35 +298,6 @@ Indexing
 - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
 - Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
 
-Assignment to multiple columns of a DataFrame when some columns do not exist
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Assignment to multiple columns of a :class:`DataFrame` when some of the columns do not exist would previously assign the values to the last column. Now, new columns would be constructed with the right values. (:issue:`13658`)
-
-.. ipython:: python
-
-   df = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]})
-   df
-
-*Previous behavior*:
-
-.. code-block:: ipython
-
-   In [3]: df[['a', 'c']] = 1
-   In [4]: df
-   Out[4]:
-      a  b
-   0  1  1
-   1  1  1
-   2  1  1
-
-*New behavior*:
-
-.. ipython:: python
-
-   df[['a', 'c']] = 1
-   df
-
 Missing
 ^^^^^^^