From 38f36578c9e29f8ffc29a655923375c73a75bf02 Mon Sep 17 00:00:00 2001
From: proost <jwalag87@gmail.com>
Date: Tue, 11 Aug 2020 01:34:36 +0900
Subject: [PATCH 1/7] ENH:column-wise DataFrame.fillna and duplicated
 DataFrame.fillna with Series and Dict (#30922)

---
 doc/source/whatsnew/v1.2.0.rst     |  2 +-
 pandas/core/generic.py             | 29 +++++----
 pandas/tests/frame/test_missing.py | 95 ++++++++++++++++++++++++++++--
 3 files changed, 109 insertions(+), 17 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 94bb265c32e4c..311f240d5f08b 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -51,7 +51,7 @@ For example:
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
--
+- :meth:`DataFrame.fillna` can fill NA values column-wise with a dictionary or :class:`Series` (:issue:`4514`)
 -
 
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 520023050d49d..70d714e813085 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6134,20 +6134,25 @@ def fillna(
                 )
 
             elif isinstance(value, (dict, ABCSeries)):
+                temp_data = self if inplace else self.copy()
+
                 if axis == 1:
-                    raise NotImplementedError(
-                        "Currently only can fill "
-                        "with dict/Series column "
-                        "by column"
-                    )
+                    for i, item in enumerate(temp_data.items()):
+                        label, content = item
+                        temp_data.iloc[:, i] = content.fillna(
+                            value, limit=limit, inplace=False, downcast=downcast
+                        )
+                else:
+                    for i, item in enumerate(temp_data.items()):
+                        label, content = item
+                        if label not in value:
+                            continue
+                        temp_data.iloc[:, i] = content.fillna(
+                            value[label], limit=limit, inplace=False, downcast=downcast
+                        )
 
-                result = self if inplace else self.copy()
-                for k, v in value.items():
-                    if k not in result:
-                        continue
-                    obj = result[k]
-                    obj.fillna(v, limit=limit, inplace=True, downcast=downcast)
-                return result if not inplace else None
+                temp_data = temp_data.infer_objects()
+                new_data = temp_data._mgr
 
             elif not is_list_like(value):
                 new_data = self._mgr.fillna(
diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py
index 9bf5d24085697..045f00a17933f 100644
--- a/pandas/tests/frame/test_missing.py
+++ b/pandas/tests/frame/test_missing.py
@@ -618,10 +618,6 @@ def test_fillna_dict_series(self):
         expected = df.fillna(df.max().to_dict())
         tm.assert_frame_equal(result, expected)
 
-        # disable this for now
-        with pytest.raises(NotImplementedError, match="column by column"):
-            df.fillna(df.max(1), axis=1)
-
     def test_fillna_dataframe(self):
         # GH 8377
         df = DataFrame(
@@ -710,3 +706,94 @@ def test_fill_corner(self, float_frame, float_string_frame):
 
         # TODO(wesm): unused?
         result = empty_float.fillna(value=0)  # noqa
+
+    @pytest.mark.parametrize(
+        "expected,fill_value",
+        [
+            (
+                DataFrame(
+                    [[100, 100], [200, 4], [5, 6]], columns=list("AB"), dtype="float64"
+                ),
+                Series([100, 200, 300]),
+            ),
+            (
+                DataFrame(
+                    [[100, 100], [np.nan, 4], [5, 6]],
+                    columns=list("AB"),
+                    dtype="float64",
+                ),
+                {0: 100, 2: 300, 3: 400},
+            ),
+        ],
+    )
+    def test_fillna_column_wise(self, expected, fill_value):
+        # GH 4514
+        df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB"))
+        result = df.fillna(fill_value, axis=1)
+        tm.assert_frame_equal(expected, result)
+
+    def test_fillna_column_wise_downcast(self):
+        # GH 4514
+        df = DataFrame([[np.nan, 2], [3, np.nan], [np.nan, np.nan]], columns=list("AB"))
+        s = Series([100, 200, 300])
+
+        expected = DataFrame(
+            [[100, 2], [3, 200], [300, 300]], columns=list("AB"), dtype="int64"
+        )
+        result = df.fillna(s, axis=1, downcast="infer")
+        tm.assert_frame_equal(expected, result)
+
+    @pytest.mark.parametrize(
+        "fill_value", [Series([100, 200, 300]), {0: 100, 2: 300, 3: 400}]
+    )
+    def test_fillna_column_wise_inplace(self, fill_value):
+        # GH 4514
+        df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB"))
+        expected = df.fillna(fill_value, axis=1, inplace=False)
+        df.fillna(fill_value, axis=1, inplace=True)
+        tm.assert_frame_equal(expected, df)
+
+    @pytest.mark.parametrize(
+        "fill_value",
+        [Series([100, 200, 300], index=[0, 1, 2]), {0: 100, 1: 200, 2: 300}],
+    )
+    def test_fillna_column_wise_duplicated_with_series_dict(self, fill_value):
+        # GH 4514
+        df = DataFrame(
+            [[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]],
+            columns=list("ABB"),
+            index=[0, 0, 1],
+        )
+        expected = DataFrame(
+            [[100, 100, 3], [100, 5, 100], [7, 200, 200]],
+            columns=list("ABB"),
+            index=[0, 0, 1],
+            dtype="float64",
+        )
+
+        result = df.fillna(fill_value, axis=1)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "fill_value",
+        [
+            Series([100, 200, 300], index=["A", "B", "C"]),
+            {"A": 100, "B": 200, "C": 300},
+        ],
+    )
+    def test_fillna_duplicated_with_series_dict(self, fill_value):
+        # GH 4514
+        df = DataFrame(
+            [[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]],
+            columns=list("ABB"),
+            index=[0, 0, 1],
+        )
+        expected = DataFrame(
+            [[100, 200, 3], [100, 5, 200], [7, 200, 200]],
+            columns=list("ABB"),
+            index=[0, 0, 1],
+            dtype="float64",
+        )
+
+        result = df.fillna(fill_value)
+        tm.assert_frame_equal(result, expected)

From bf953c7e9e3559c9ee30e1d65e6951754b63c8d9 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Mon, 7 Dec 2020 23:11:15 -0500
Subject: [PATCH 2/7] fix merge error

---
 doc/source/whatsnew/v1.2.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index d8a47472dc299..06d1dc5ffc1db 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -317,7 +317,6 @@ of columns could result in a larger Series result. See (:issue:`37799`).
     df = pd.DataFrame({"A": ["foo", "bar"], "B": [True, False]}, dtype=object)
     df["C"] = pd.Series([True, True])
 
->>>>>>> 862cd05df4452592a99dd1a4fa10ce8cfb3766f7
 
 *Previous behavior*:
 

From 4df35015db5d73dbb5ffebf5d915d7cad75db9c3 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Mon, 7 Dec 2020 23:17:05 -0500
Subject: [PATCH 3/7] simplify code block

---
 pandas/core/generic.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 39222bf85cf59..bdbd9282180e6 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6400,20 +6400,13 @@ def fillna(
             elif isinstance(value, (dict, ABCSeries)):
                 temp_data = self if inplace else self.copy()
 
-                if axis == 1:
-                    for i, item in enumerate(temp_data.items()):
-                        label, content = item
-                        temp_data.iloc[:, i] = content.fillna(
-                            value, limit=limit, inplace=False, downcast=downcast
-                        )
-                else:
-                    for i, item in enumerate(temp_data.items()):
-                        label, content = item
-                        if label not in value:
-                            continue
-                        temp_data.iloc[:, i] = content.fillna(
-                            value[label], limit=limit, inplace=False, downcast=downcast
-                        )
+                for i, (label, content) in enumerate(temp_data.items()):
+                    if axis == 0 and label not in value:
+                        continue
+                    fill_val = value[label] if axis == 0 else value
+                    temp_data.iloc[:, i] = content.fillna(
+                        fill_val, limit=limit, inplace=False, downcast=downcast
+                    )
 
                 temp_data = temp_data.infer_objects()
                 new_data = temp_data._mgr

From 16cceb034932a736daad09e599f8f10b54e55dd2 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Mon, 7 Dec 2020 23:22:48 -0500
Subject: [PATCH 4/7] simplify more

---
 pandas/core/generic.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index bdbd9282180e6..03b5f13f9ac60 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6398,18 +6398,20 @@ def fillna(
                 )
 
             elif isinstance(value, (dict, ABCSeries)):
-                temp_data = self if inplace else self.copy()
+                tmp = self if inplace else self.copy()
 
-                for i, (label, content) in enumerate(temp_data.items()):
+                for i, (label, content) in enumerate(tmp.items()):
                     if axis == 0 and label not in value:
                         continue
-                    fill_val = value[label] if axis == 0 else value
-                    temp_data.iloc[:, i] = content.fillna(
-                        fill_val, limit=limit, inplace=False, downcast=downcast
+                    tmp.iloc[:, i] = content.fillna(
+                        value[label] if axis == 0 else value,
+                        limit=limit,
+                        inplace=False,
+                        downcast=downcast,
                     )
 
-                temp_data = temp_data.infer_objects()
-                new_data = temp_data._mgr
+                tmp = tmp.infer_objects()
+                new_data = tmp._mgr
 
             elif not is_list_like(value):
                 new_data = self._mgr.fillna(

From d061f6fd7d38de3d3f075e1954ce21fab2534c87 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Mon, 7 Dec 2020 23:33:24 -0500
Subject: [PATCH 5/7] fix test

---
 pandas/tests/frame/methods/test_fillna.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index d305e4940be0c..2089dfa58e837 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -612,6 +612,7 @@ def test_fillna_duplicated_with_series_dict(self, fill_value):
         result = df.fillna(fill_value)
         tm.assert_frame_equal(result, expected)
 
+
 def test_fillna_nonconsolidated_frame():
     # https://github.com/pandas-dev/pandas/issues/36495
     df = DataFrame(

From 55cee45a0a191eab40cb62028adcca0afed7f6a7 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Mon, 14 Dec 2020 12:47:47 -0500
Subject: [PATCH 6/7] move whatsnew to 1.3

---
 doc/source/whatsnew/v1.3.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 05d3b1c797375..70ebe81e124fe 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -21,6 +21,7 @@ Other enhancements
 
 - Added :meth:`MultiIndex.dtypes` (:issue:`37062`)
 - Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`)
+- :meth:`DataFrame.fillna` can fill NA values column-wise with a dictionary or :class:`Series` (:issue:`4514`)
 
 .. ---------------------------------------------------------------------------
 

From dc66a0b8ece7c80206c0eae9488701ab21208702 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Mon, 14 Dec 2020 12:48:03 -0500
Subject: [PATCH 7/7] move whatsnew to 1.3

---
 doc/source/whatsnew/v1.2.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index d2a3aaf3495be..e2521cedb64cc 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -275,7 +275,6 @@ Other enhancements
 - Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a Series or DataFrame (:issue:`28394`)
 - :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`)
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
-- :meth:`DataFrame.fillna` can fill NA values column-wise with a dictionary or :class:`Series` (:issue:`4514`)
 - :meth:`io.sql.get_schema` now supports a ``schema`` keyword argument that will add a schema into the create table statement (:issue:`28486`)
 - :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
 - :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`)