-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Enable fillna(value=None) #58085
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1ae8dcc
0f171fa
80faa78
cbd93a4
75f8fa3
9c8bdf9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6752,7 +6752,7 @@ def _pad_or_backfill( | |
@overload | ||
def fillna( | ||
self, | ||
value: Hashable | Mapping | Series | DataFrame = ..., | ||
value: Hashable | Mapping | Series | DataFrame, | ||
*, | ||
axis: Axis | None = ..., | ||
inplace: Literal[False] = ..., | ||
|
@@ -6762,7 +6762,7 @@ def fillna( | |
@overload | ||
def fillna( | ||
self, | ||
value: Hashable | Mapping | Series | DataFrame = ..., | ||
value: Hashable | Mapping | Series | DataFrame, | ||
*, | ||
axis: Axis | None = ..., | ||
inplace: Literal[True], | ||
|
@@ -6772,7 +6772,7 @@ def fillna( | |
@overload | ||
def fillna( | ||
self, | ||
value: Hashable | Mapping | Series | DataFrame = ..., | ||
value: Hashable | Mapping | Series | DataFrame, | ||
*, | ||
axis: Axis | None = ..., | ||
inplace: bool = ..., | ||
|
@@ -6786,7 +6786,7 @@ def fillna( | |
) | ||
def fillna( | ||
self, | ||
value: Hashable | Mapping | Series | DataFrame | None = None, | ||
value: Hashable | Mapping | Series | DataFrame, | ||
*, | ||
axis: Axis | None = None, | ||
inplace: bool = False, | ||
|
@@ -6827,6 +6827,12 @@ def fillna( | |
reindex : Conform object to new index. | ||
asfreq : Convert TimeSeries to specified frequency. | ||
|
||
Notes | ||
----- | ||
For non-object dtype, ``value=None`` will use the NA value of the dtype. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
See more details in the :ref:`Filling missing data<missing_data.fillna>` | ||
section. | ||
|
||
Examples | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe you should add an example with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is a good suggestion, but what do you think about doing it in https://pandas.pydata.org/docs/user_guide/missing_data.html#filling-missing-data instead of the docstring here? Can then link to this in the docstring. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That's fine, although I think the behavior of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
-------- | ||
>>> df = pd.DataFrame( | ||
|
@@ -6909,101 +6915,92 @@ def fillna( | |
axis = 0 | ||
axis = self._get_axis_number(axis) | ||
|
||
if value is None: | ||
raise ValueError("Must specify a fill 'value'.") | ||
else: | ||
if self.ndim == 1: | ||
if isinstance(value, (dict, ABCSeries)): | ||
if not len(value): | ||
# test_fillna_nonscalar | ||
if inplace: | ||
return None | ||
return self.copy(deep=False) | ||
from pandas import Series | ||
|
||
value = Series(value) | ||
value = value.reindex(self.index) | ||
value = value._values | ||
elif not is_list_like(value): | ||
pass | ||
else: | ||
raise TypeError( | ||
'"value" parameter must be a scalar, dict ' | ||
"or Series, but you passed a " | ||
f'"{type(value).__name__}"' | ||
) | ||
if self.ndim == 1: | ||
if isinstance(value, (dict, ABCSeries)): | ||
if not len(value): | ||
# test_fillna_nonscalar | ||
if inplace: | ||
return None | ||
return self.copy(deep=False) | ||
from pandas import Series | ||
|
||
value = Series(value) | ||
value = value.reindex(self.index) | ||
value = value._values | ||
elif not is_list_like(value): | ||
pass | ||
else: | ||
raise TypeError( | ||
'"value" parameter must be a scalar, dict ' | ||
"or Series, but you passed a " | ||
f'"{type(value).__name__}"' | ||
) | ||
|
||
new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace) | ||
new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace) | ||
|
||
elif isinstance(value, (dict, ABCSeries)): | ||
if axis == 1: | ||
raise NotImplementedError( | ||
"Currently only can fill " | ||
"with dict/Series column " | ||
"by column" | ||
) | ||
result = self if inplace else self.copy(deep=False) | ||
for k, v in value.items(): | ||
if k not in result: | ||
continue | ||
elif isinstance(value, (dict, ABCSeries)): | ||
if axis == 1: | ||
raise NotImplementedError( | ||
"Currently only can fill with dict/Series column by column" | ||
) | ||
result = self if inplace else self.copy(deep=False) | ||
for k, v in value.items(): | ||
if k not in result: | ||
continue | ||
|
||
res_k = result[k].fillna(v, limit=limit) | ||
res_k = result[k].fillna(v, limit=limit) | ||
|
||
if not inplace: | ||
result[k] = res_k | ||
if not inplace: | ||
result[k] = res_k | ||
else: | ||
# We can write into our existing column(s) iff dtype | ||
# was preserved. | ||
if isinstance(res_k, ABCSeries): | ||
# i.e. 'k' only shows up once in self.columns | ||
if res_k.dtype == result[k].dtype: | ||
result.loc[:, k] = res_k | ||
else: | ||
# Different dtype -> no way to do inplace. | ||
result[k] = res_k | ||
else: | ||
# We can write into our existing column(s) iff dtype | ||
# was preserved. | ||
if isinstance(res_k, ABCSeries): | ||
# i.e. 'k' only shows up once in self.columns | ||
if res_k.dtype == result[k].dtype: | ||
result.loc[:, k] = res_k | ||
# see test_fillna_dict_inplace_nonunique_columns | ||
locs = result.columns.get_loc(k) | ||
if isinstance(locs, slice): | ||
locs = np.arange(self.shape[1])[locs] | ||
elif isinstance(locs, np.ndarray) and locs.dtype.kind == "b": | ||
locs = locs.nonzero()[0] | ||
elif not ( | ||
isinstance(locs, np.ndarray) and locs.dtype.kind == "i" | ||
): | ||
# Should never be reached, but let's cover our bases | ||
raise NotImplementedError( | ||
"Unexpected get_loc result, please report a bug at " | ||
"https://github.com/pandas-dev/pandas" | ||
) | ||
|
||
for i, loc in enumerate(locs): | ||
res_loc = res_k.iloc[:, i] | ||
target = self.iloc[:, loc] | ||
|
||
if res_loc.dtype == target.dtype: | ||
result.iloc[:, loc] = res_loc | ||
else: | ||
# Different dtype -> no way to do inplace. | ||
result[k] = res_k | ||
else: | ||
# see test_fillna_dict_inplace_nonunique_columns | ||
locs = result.columns.get_loc(k) | ||
if isinstance(locs, slice): | ||
locs = np.arange(self.shape[1])[locs] | ||
elif ( | ||
isinstance(locs, np.ndarray) and locs.dtype.kind == "b" | ||
): | ||
locs = locs.nonzero()[0] | ||
elif not ( | ||
isinstance(locs, np.ndarray) and locs.dtype.kind == "i" | ||
): | ||
# Should never be reached, but let's cover our bases | ||
raise NotImplementedError( | ||
"Unexpected get_loc result, please report a bug at " | ||
"https://github.com/pandas-dev/pandas" | ||
) | ||
|
||
for i, loc in enumerate(locs): | ||
res_loc = res_k.iloc[:, i] | ||
target = self.iloc[:, loc] | ||
|
||
if res_loc.dtype == target.dtype: | ||
result.iloc[:, loc] = res_loc | ||
else: | ||
result.isetitem(loc, res_loc) | ||
if inplace: | ||
return self._update_inplace(result) | ||
else: | ||
return result | ||
result.isetitem(loc, res_loc) | ||
if inplace: | ||
return self._update_inplace(result) | ||
else: | ||
return result | ||
|
||
elif not is_list_like(value): | ||
if axis == 1: | ||
result = self.T.fillna(value=value, limit=limit).T | ||
new_data = result._mgr | ||
else: | ||
new_data = self._mgr.fillna( | ||
value=value, limit=limit, inplace=inplace | ||
) | ||
elif isinstance(value, ABCDataFrame) and self.ndim == 2: | ||
new_data = self.where(self.notna(), value)._mgr | ||
elif not is_list_like(value): | ||
if axis == 1: | ||
result = self.T.fillna(value=value, limit=limit).T | ||
new_data = result._mgr | ||
else: | ||
raise ValueError(f"invalid fill value with a {type(value)}") | ||
new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace) | ||
elif isinstance(value, ABCDataFrame) and self.ndim == 2: | ||
new_data = self.where(self.notna(), value)._mgr | ||
else: | ||
raise ValueError(f"invalid fill value with a {type(value)}") | ||
|
||
result = self._constructor_from_mgr(new_data, axes=new_data.axes) | ||
if inplace: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The other change here is that you must specify a value for
value
, whereas before it defaulted toNone
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Previously, the default value of None would also raise. In other words, both before an after this PR you have to specify
value
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That was true if you didn't specify
method
. But with 2.2:So I think it is worth saying that
value
is now required.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But method is no longer an argument. In 2.x, you either had to specify
value
ormethod
, otherwise we'd raise. Any code that does not raise a warning in 2.2 is already specifying thevalue
argument. Any code that raises a warning in 2.2 will raise a TypeError in 3.0.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So don't we have to say somewhere that we are enforcing the deprecation that
method
is no longer an argument?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pandas/doc/source/whatsnew/v3.0.0.rst
Line 283 in 6e09e97
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great. All good then