Skip to content

Commit 8346780

Browse files
committed
ENH:column-wise DataFrame.fillna with Series and Dict (pandas-dev#4514)
1 parent 9a68635 commit 8346780

File tree

3 files changed

+109
-13
lines changed

3 files changed

+109
-13
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Enhancements
1818
Other enhancements
1919
^^^^^^^^^^^^^^^^^^
2020

21-
-
21+
- :meth:`DataFrame.fillna` can fill NA values column-wise with a dictionary or :class:`Series` (:issue:`4514`)
2222
-
2323

2424

pandas/core/generic.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -5960,19 +5960,25 @@ def fillna(
59605960
)
59615961

59625962
elif isinstance(value, (dict, ABCSeries)):
5963+
result = self if inplace else self.copy()
5964+
59635965
if axis == 1:
5964-
raise NotImplementedError(
5965-
"Currently only can fill "
5966-
"with dict/Series column "
5967-
"by column"
5968-
)
5966+
# To access column base
5967+
result = result.T
59695968

5970-
result = self if inplace else self.copy()
5971-
for k, v in value.items():
5972-
if k not in result:
5969+
for i in range(result.columns.size):
5970+
label = result.columns[i]
5971+
5972+
if label not in value.keys():
59735973
continue
5974-
obj = result[k]
5975-
obj.fillna(v, limit=limit, inplace=True, downcast=downcast)
5974+
5975+
result.iloc[:, i] = result.iloc[:, i].fillna(
5976+
value[label], limit=limit, inplace=False, downcast=downcast
5977+
)
5978+
5979+
if axis == 1:
5980+
result = result.T
5981+
59765982
return result if not inplace else None
59775983

59785984
elif not is_list_like(value):

pandas/tests/frame/test_missing.py

+92-2
Original file line numberDiff line numberDiff line change
@@ -605,8 +605,15 @@ def test_fillna_dict_series(self):
605605
tm.assert_frame_equal(result, expected)
606606

607607
# disable this for now
608-
with pytest.raises(NotImplementedError, match="column by column"):
609-
df.fillna(df.max(1), axis=1)
608+
expected = DataFrame(
609+
{
610+
"a": [1.0, 1.0, 2.0, 3.0, 4.0],
611+
"b": [1.0, 2.0, 3.0, 3.0, 4.0],
612+
"c": [1.0, 1.0, 2.0, 3.0, 4.0],
613+
}
614+
)
615+
result = df.fillna(df.max(1), axis=1)
616+
tm.assert_frame_equal(expected, result)
610617

611618
def test_fillna_dataframe(self):
612619
# GH 8377
@@ -983,3 +990,86 @@ def test_interp_time_inplace_axis(self, axis):
983990
result = expected.interpolate(axis=0, method="time")
984991
expected.interpolate(axis=0, method="time", inplace=True)
985992
tm.assert_frame_equal(result, expected)
993+
994+
@pytest.mark.parametrize(
995+
"expected,fill_value",
996+
[
997+
(
998+
DataFrame(
999+
[[100, 100], [200, 4], [5, 6]], columns=list("AB"), dtype="float64"
1000+
),
1001+
Series([100, 200, 300]),
1002+
),
1003+
(
1004+
DataFrame(
1005+
[[100, 100], [np.nan, 4], [5, 6]],
1006+
columns=list("AB"),
1007+
dtype="float64",
1008+
),
1009+
{0: 100, 2: 300, 3: 400},
1010+
),
1011+
],
1012+
)
1013+
def test_fillna_column_wise(self, expected, fill_value):
1014+
# GH 4514
1015+
df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB"))
1016+
result = df.fillna(fill_value, axis=1)
1017+
tm.assert_frame_equal(expected, result)
1018+
1019+
df.fillna(fill_value, axis=1, inplace=True)
1020+
tm.assert_frame_equal(expected, df)
1021+
1022+
def test_fillna_column_wise_downcast(self):
1023+
df = DataFrame([[np.nan, 2], [3, np.nan], [np.nan, np.nan]], columns=list("AB"))
1024+
s = Series([100, 200, 300])
1025+
1026+
expected = DataFrame(
1027+
[[100, 2], [3, 200], [300, 300]], columns=list("AB"), dtype="int64"
1028+
)
1029+
result = df.fillna(s, axis=1, downcast="infer")
1030+
tm.assert_frame_equal(expected, result)
1031+
1032+
@pytest.mark.parametrize(
1033+
"fill_value",
1034+
[Series([100, 200, 300], index=[0, 1, 2]), {0: 100, 1: 200, 2: 300}],
1035+
)
1036+
def test_fillna_column_wise_duplicated_with_series_dict(self, fill_value):
1037+
# GH 4514
1038+
df = DataFrame(
1039+
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]],
1040+
columns=list("ABB"),
1041+
index=[0, 0, 1],
1042+
)
1043+
expected = DataFrame(
1044+
[[100, 100, 3], [100, 5, 100], [7, 200, 200]],
1045+
columns=list("ABB"),
1046+
index=[0, 0, 1],
1047+
dtype="float64",
1048+
)
1049+
1050+
result = df.fillna(fill_value, axis=1)
1051+
tm.assert_frame_equal(result, expected)
1052+
1053+
@pytest.mark.parametrize(
1054+
"fill_value",
1055+
[
1056+
Series([100, 200, 300], index=["A", "B", "C"]),
1057+
{"A": 100, "B": 200, "C": 300},
1058+
],
1059+
)
1060+
def test_fillna_duplicated_with_series_dict(self, fill_value):
1061+
# GH 4514
1062+
df = DataFrame(
1063+
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]],
1064+
columns=list("ABB"),
1065+
index=[0, 0, 1],
1066+
)
1067+
expected = DataFrame(
1068+
[[100, 200, 3], [100, 5, 200], [7, 200, 200]],
1069+
columns=list("ABB"),
1070+
index=[0, 0, 1],
1071+
dtype="float64",
1072+
)
1073+
1074+
result = df.fillna(fill_value)
1075+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)