-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: column-wise DataFrame.fillna with Series/Dict value #38352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
38f3657
f4052dc
bf953c7
4df3501
16cceb0
b1a70d8
d061f6f
8c630d0
55cee45
dc66a0b
9fac251
50ae79c
51d3a65
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -432,10 +432,6 @@ def test_fillna_dict_series(self): | |
expected = df.fillna(df.max().to_dict()) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
# disable this for now | ||
with pytest.raises(NotImplementedError, match="column by column"): | ||
df.fillna(df.max(1), axis=1) | ||
|
||
def test_fillna_dataframe(self): | ||
# GH#8377 | ||
df = DataFrame( | ||
|
@@ -525,6 +521,97 @@ def test_fill_corner(self, float_frame, float_string_frame): | |
# TODO(wesm): unused? | ||
result = empty_float.fillna(value=0) # noqa | ||
|
||
@pytest.mark.parametrize( | ||
"expected,fill_value", | ||
[ | ||
( | ||
DataFrame( | ||
[[100, 100], [200, 4], [5, 6]], columns=list("AB"), dtype="float64" | ||
), | ||
Series([100, 200, 300]), | ||
), | ||
( | ||
DataFrame( | ||
[[100, 100], [np.nan, 4], [5, 6]], | ||
columns=list("AB"), | ||
dtype="float64", | ||
), | ||
{0: 100, 2: 300, 3: 400}, | ||
), | ||
], | ||
) | ||
def test_fillna_column_wise(self, expected, fill_value): | ||
# GH 4514 | ||
df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB")) | ||
result = df.fillna(fill_value, axis=1) | ||
tm.assert_frame_equal(expected, result) | ||
|
||
def test_fillna_column_wise_downcast(self): | ||
# GH 4514 | ||
df = DataFrame([[np.nan, 2], [3, np.nan], [np.nan, np.nan]], columns=list("AB")) | ||
s = Series([100, 200, 300]) | ||
|
||
expected = DataFrame( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what happens if we have a datetime column mixed in here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We end up with object dtype: In [5]: df = pd.DataFrame([[np.nan, 2], [3, np.nan], [np.nan, np.nan]], columns=list("AB"))
...: s = pd.Series(pd.to_datetime([100, 200, 300], unit="ns"))
...:
...: result = df.fillna(s, axis=1, downcast="infer")
...: result
Out[5]:
A B
0 1970-01-01 00:00:00.000000100 2.0
1 3.0 1970-01-01 00:00:00.000000200
2 1970-01-01 00:00:00.000000300 1970-01-01 00:00:00.000000300
In [6]: result.dtypes
Out[6]:
A object
B object
dtype: object In [8]: df = pd.DataFrame({"A": pd.to_datetime([np.nan, 2, np.nan]), "B": pd.to_datetime([3, np.nan, np.nan])})
...: s = pd.Series([100, 200, 300])
...:
...: result = df.fillna(s, axis=1, downcast="infer")
...: result
Out[8]:
A B
0 100 1970-01-01 00:00:00.000000003
1 1970-01-01 00:00:00.000000002 200
2 300 300
In [9]: result.dtypes
Out[9]:
A object
B object There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this what we want? |
||
[[100, 2], [3, 200], [300, 300]], columns=list("AB"), dtype="int64" | ||
) | ||
result = df.fillna(s, axis=1, downcast="infer") | ||
tm.assert_frame_equal(expected, result) | ||
|
||
@pytest.mark.parametrize( | ||
"fill_value", [Series([100, 200, 300]), {0: 100, 2: 300, 3: 400}] | ||
) | ||
def test_fillna_column_wise_inplace(self, fill_value): | ||
# GH 4514 | ||
df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB")) | ||
expected = df.fillna(fill_value, axis=1, inplace=False) | ||
df.fillna(fill_value, axis=1, inplace=True) | ||
tm.assert_frame_equal(expected, df) | ||
|
||
@pytest.mark.parametrize( | ||
"fill_value", | ||
[Series([100, 200, 300], index=[0, 1, 2]), {0: 100, 1: 200, 2: 300}], | ||
) | ||
def test_fillna_column_wise_duplicated_with_series_dict(self, fill_value): | ||
# GH 4514 | ||
df = DataFrame( | ||
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]], | ||
columns=list("ABB"), | ||
index=[0, 0, 1], | ||
) | ||
expected = DataFrame( | ||
[[100, 100, 3], [100, 5, 100], [7, 200, 200]], | ||
columns=list("ABB"), | ||
index=[0, 0, 1], | ||
dtype="float64", | ||
) | ||
|
||
result = df.fillna(fill_value, axis=1) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
@pytest.mark.parametrize( | ||
"fill_value", | ||
[ | ||
Series([100, 200, 300], index=["A", "B", "C"]), | ||
{"A": 100, "B": 200, "C": 300}, | ||
], | ||
) | ||
def test_fillna_duplicated_with_series_dict(self, fill_value): | ||
# GH 4514 | ||
df = DataFrame( | ||
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]], | ||
columns=list("ABB"), | ||
index=[0, 0, 1], | ||
) | ||
expected = DataFrame( | ||
[[100, 200, 3], [100, 5, 200], [7, 200, 200]], | ||
columns=list("ABB"), | ||
index=[0, 0, 1], | ||
dtype="float64", | ||
) | ||
|
||
result = df.fillna(fill_value) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_fillna_nonconsolidated_frame(): | ||
# https://github.com/pandas-dev/pandas/issues/36495 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move to 1.3
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done