diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 56a5412d4ecfc..395ce9277f631 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -119,6 +119,7 @@ Other enhancements - Added :meth:`MultiIndex.dtypes` (:issue:`37062`) - Added ``end`` and ``end_day`` options for ``origin`` in :meth:`DataFrame.resample` (:issue:`37804`) - Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`) +- :meth:`DataFrame.fillna` can fill NA values column-wise with a dictionary or :class:`Series` (:issue:`4514`) - Improved consistency of error message when passing an invalid ``win_type`` argument in :class:`Window` (:issue:`15969`) - :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`) - Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 050550f8add50..2cf02dabb59d4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6492,20 +6492,20 @@ def fillna( ) elif isinstance(value, (dict, ABCSeries)): - if axis == 1: - raise NotImplementedError( - "Currently only can fill " - "with dict/Series column " - "by column" - ) + tmp = self if inplace else self.copy() - result = self if inplace else self.copy() - for k, v in value.items(): - if k not in result: + for i, (label, content) in enumerate(tmp.items()): + if axis == 0 and label not in value: continue - obj = result[k] - obj.fillna(v, limit=limit, inplace=True, downcast=downcast) - return result if not inplace else None + tmp.iloc[:, i] = content.fillna( + value[label] if axis == 0 else value, + limit=limit, + inplace=False, + downcast=downcast, + ) + + tmp = tmp.infer_objects() + new_data = tmp._mgr elif not is_list_like(value): new_data = self._mgr.fillna( diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 564481d01abc8..423f786a92593 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -438,10 +438,6 @@ def test_fillna_dict_series(self): expected = df.fillna(df.max().to_dict()) tm.assert_frame_equal(result, expected) - # disable this for now - with pytest.raises(NotImplementedError, match="column by column"): - df.fillna(df.max(1), axis=1) - def test_fillna_dataframe(self): # GH#8377 df = DataFrame( @@ -531,6 +527,97 @@ def test_fill_corner(self, float_frame, float_string_frame): # TODO(wesm): unused? result = empty_float.fillna(value=0) # noqa + @pytest.mark.parametrize( + "expected,fill_value", + [ + ( + DataFrame( + [[100, 100], [200, 4], [5, 6]], columns=list("AB"), dtype="float64" + ), + Series([100, 200, 300]), + ), + ( + DataFrame( + [[100, 100], [np.nan, 4], [5, 6]], + columns=list("AB"), + dtype="float64", + ), + {0: 100, 2: 300, 3: 400}, + ), + ], + ) + def test_fillna_column_wise(self, expected, fill_value): + # GH 4514 + df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB")) + result = df.fillna(fill_value, axis=1) + tm.assert_frame_equal(expected, result) + + def test_fillna_column_wise_downcast(self): + # GH 4514 + df = DataFrame([[np.nan, 2], [3, np.nan], [np.nan, np.nan]], columns=list("AB")) + s = Series([100, 200, 300]) + + expected = DataFrame( + [[100, 2], [3, 200], [300, 300]], columns=list("AB"), dtype="int64" + ) + result = df.fillna(s, axis=1, downcast="infer") + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "fill_value", [Series([100, 200, 300]), {0: 100, 2: 300, 3: 400}] + ) + def test_fillna_column_wise_inplace(self, fill_value): + # GH 4514 + df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB")) + expected = df.fillna(fill_value, axis=1, inplace=False) + df.fillna(fill_value, axis=1, inplace=True) + tm.assert_frame_equal(expected, df) + + @pytest.mark.parametrize( + "fill_value", + [Series([100, 200, 300], index=[0, 1, 2]), {0: 100, 1: 200, 2: 300}], + ) + def test_fillna_column_wise_duplicated_with_series_dict(self, fill_value): + # GH 4514 + df = DataFrame( + [[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]], + columns=list("ABB"), + index=[0, 0, 1], + ) + expected = DataFrame( + [[100, 100, 3], [100, 5, 100], [7, 200, 200]], + columns=list("ABB"), + index=[0, 0, 1], + dtype="float64", + ) + + result = df.fillna(fill_value, axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "fill_value", + [ + Series([100, 200, 300], index=["A", "B", "C"]), + {"A": 100, "B": 200, "C": 300}, + ], + ) + def test_fillna_duplicated_with_series_dict(self, fill_value): + # GH 4514 + df = DataFrame( + [[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]], + columns=list("ABB"), + index=[0, 0, 1], + ) + expected = DataFrame( + [[100, 200, 3], [100, 5, 200], [7, 200, 200]], + columns=list("ABB"), + index=[0, 0, 1], + dtype="float64", + ) + + result = df.fillna(fill_value) + tm.assert_frame_equal(result, expected) + def test_fillna_nonconsolidated_frame(): # https://github.com/pandas-dev/pandas/issues/36495