Skip to content

POC/API/DEPR: errors kwd for fillna #45190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5096,6 +5096,7 @@ def fillna(
inplace: Literal[False] = ...,
limit=...,
downcast=...,
errors=...,
) -> DataFrame:
...

Expand All @@ -5108,6 +5109,7 @@ def fillna(
inplace: Literal[True],
limit=...,
downcast=...,
errors=...,
) -> None:
...

Expand All @@ -5118,6 +5120,7 @@ def fillna(
inplace: Literal[True],
limit=...,
downcast=...,
errors=...,
) -> None:
...

Expand All @@ -5129,6 +5132,7 @@ def fillna(
inplace: Literal[True],
limit=...,
downcast=...,
errors=...,
) -> None:
...

Expand All @@ -5140,6 +5144,7 @@ def fillna(
inplace: Literal[True],
limit=...,
downcast=...,
errors=...,
) -> None:
...

Expand All @@ -5151,6 +5156,7 @@ def fillna(
inplace: Literal[True],
limit=...,
downcast=...,
errors=...,
) -> None:
...

Expand All @@ -5163,6 +5169,7 @@ def fillna(
inplace: Literal[True],
limit=...,
downcast=...,
errors=...,
) -> None:
...

Expand All @@ -5175,6 +5182,7 @@ def fillna(
inplace: Literal[True],
limit=...,
downcast=...,
errors=...,
) -> None:
...

Expand All @@ -5187,6 +5195,7 @@ def fillna(
inplace: Literal[True],
limit=...,
downcast=...,
errors=...,
) -> None:
...

Expand All @@ -5199,6 +5208,7 @@ def fillna(
inplace: bool = ...,
limit=...,
downcast=...,
errors=...,
) -> DataFrame | None:
...

Expand All @@ -5212,6 +5222,7 @@ def fillna(
inplace: bool = False,
limit=None,
downcast=None,
errors=lib.no_default,
) -> DataFrame | None:
return super().fillna(
value=value,
Expand All @@ -5220,6 +5231,7 @@ def fillna(
inplace=inplace,
limit=limit,
downcast=downcast,
errors=errors,
)

def pop(self, item: Hashable) -> Series:
Expand Down
35 changes: 28 additions & 7 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6278,6 +6278,7 @@ def fillna(
inplace: bool_t = False,
limit=None,
downcast=None,
errors=lib.no_default,
) -> NDFrameT | None:
"""
Fill NA/NaN values using the specified method.
Expand Down Expand Up @@ -6311,6 +6312,14 @@ def fillna(
A dict of item->dtype of what to downcast if possible,
or the string 'infer' which will try to downcast to an appropriate
equal type (e.g. float64 to int64 if possible).
errors : {{'raise', 'coerce'}}
If the given value cannot be filled into an array with this dtype,
do we raise or coerce to a common dtype?
Default depends on dtype for backward compatibility. For most dtypes,
the default is to coerce. In a future version, the default will be
to coerce for all dtypes.

.. versionadded:: 1.4.0

Returns
-------
Expand Down Expand Up @@ -6390,6 +6399,8 @@ def fillna(
"""
inplace = validate_bool_kwarg(inplace, "inplace")
value, method = validate_fillna_kwargs(value, method)
if errors not in ["raise", "coerce", lib.no_default]:
raise ValueError("'errors' must be either 'raise' or 'coerce'")

self._consolidate_inplace()

Expand All @@ -6403,7 +6414,7 @@ def fillna(
if not self._mgr.is_single_block and axis == 1:
if inplace:
raise NotImplementedError()
result = self.T.fillna(method=method, limit=limit).T
result = self.T.fillna(method=method, limit=limit, errors=errors).T

return result

Expand All @@ -6413,7 +6424,7 @@ def fillna(
limit=limit,
inplace=inplace,
coerce=True,
downcast=downcast,
downcast=downcast, # TODO: errors
)
else:
if self.ndim == 1:
Expand All @@ -6438,7 +6449,11 @@ def fillna(
)

new_data = self._mgr.fillna(
value=value, limit=limit, inplace=inplace, downcast=downcast
value=value,
limit=limit,
inplace=inplace,
downcast=downcast,
errors=errors,
)

elif isinstance(value, (dict, ABCSeries)):
Expand All @@ -6455,23 +6470,29 @@ def fillna(
if k not in result:
continue
downcast_k = downcast if not is_dict else downcast.get(k)
result[k] = result[k].fillna(v, limit=limit, downcast=downcast_k)
result[k] = result[k].fillna(
v, limit=limit, downcast=downcast_k, errors=errors
)
return result if not inplace else None

elif not is_list_like(value):
if not self._mgr.is_single_block and axis == 1:

result = self.T.fillna(value=value, limit=limit).T
result = self.T.fillna(value=value, limit=limit, errors=errors).T

new_data = result
else:

new_data = self._mgr.fillna(
value=value, limit=limit, inplace=inplace, downcast=downcast
value=value,
limit=limit,
inplace=inplace,
downcast=downcast,
errors=errors,
)
elif isinstance(value, ABCDataFrame) and self.ndim == 2:

new_data = self.where(self.notna(), value)._mgr
new_data = self.where(self.notna(), value)._mgr # TODO: errors
else:
raise ValueError(f"invalid fill value with a {type(value)}")

Expand Down
9 changes: 7 additions & 2 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,9 +382,14 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:
"shift", periods=periods, axis=axis, fill_value=fill_value
)

def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
def fillna(self: T, value, limit, inplace: bool, downcast, errors) -> T:
return self.apply_with_block(
"fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
"fillna",
value=value,
limit=limit,
inplace=inplace,
downcast=downcast,
errors=errors,
)

def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
Expand Down
82 changes: 69 additions & 13 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,12 @@ def _split_op_result(self, result: ArrayLike) -> list[Block]:
return [nb]

def fillna(
self, value, limit=None, inplace: bool = False, downcast=None
self,
value,
limit=None,
inplace: bool = False,
downcast=None,
errors=lib.no_default,
) -> list[Block]:
"""
fillna on the block with the value. If we fail, then convert to
Expand All @@ -455,7 +460,7 @@ def fillna(
else:
return [self.copy()]

if self._can_hold_element(value):
if self._can_hold_element(value) or errors == "raise":
nb = self if inplace else self.copy()
putmask_inplace(nb.values, mask, value)
return nb._maybe_downcast([nb], downcast)
Expand Down Expand Up @@ -1673,9 +1678,35 @@ def getitem_block_index(self, slicer: slice) -> ExtensionBlock:
return type(self)(new_values, self._mgr_locs, ndim=self.ndim)

def fillna(
self, value, limit=None, inplace: bool = False, downcast=None
self,
value,
limit=None,
inplace: bool = False,
downcast=None,
errors=lib.no_default,
) -> list[Block]:
values = self.values.fillna(value=value, limit=limit)
try:
values = self.values.fillna(value=value, limit=limit)
except (ValueError, TypeError):
if errors is lib.no_default:
warnings.warn(
f"The default behavior of fillna with {self.dtype} is "
"deprecated. In a future version, when the specified value "
"cannot be held in an array, the value and array will be "
"coerced to a common dtype. This is consistent with the "
"behavior with numpy dtypes. To retain the old behavior, "
"pass `errors='raise'` to obj.fillna. To get the future "
"behavior, pass `errors='coerce'`.",
FutureWarning,
stacklevel=find_stack_level(),
)
errors = "raise"
if errors == "coerce":
blk = self.coerce_to_target_dtype(value)
return blk.fillna(value, limit=limit, inplace=True, downcast=downcast)
else:
raise

return [self.make_block_same_class(values=values)]

def diff(self, n: int, axis: int = 1) -> list[Block]:
Expand Down Expand Up @@ -1814,18 +1845,43 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Blo
return [self.make_block_same_class(new_values)]

def fillna(
self, value, limit=None, inplace: bool = False, downcast=None
self,
value,
limit=None,
inplace: bool = False,
downcast=None,
errors=lib.no_default,
) -> list[Block]:

if not self._can_hold_element(value) and self.dtype.kind != "m":
# We support filling a DatetimeTZ with a `value` whose timezone
# is different by coercing to object.
# TODO: don't special-case td64
return self.coerce_to_target_dtype(value).fillna(
value, limit, inplace, downcast
)
try:
new_values = self.values.fillna(value=value, limit=limit)
except (ValueError, TypeError) as err:
if isinstance(err, ValueError) and "Timezones don't match" not in str(err):
# TODO(2.0): remove catching ValueError at all since
# DTA raising here is deprecated
raise

if errors is lib.no_default and self.dtype.kind == "m":
# Deprecating special-casing of td64
warnings.warn(
f"The default behavior of fillna with {self.dtype} is "
"deprecated. In a future version, when the specified value "
"cannot be held in an array, the value and array will be "
"coerced to a common dtype. This is consistent with the "
"behavior with numpy dtypes. To retain the old behavior, "
"pass `errors='raise'` to obj.fillna. To get the future "
"behavior, pass `errors='coerce'`.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise
elif errors == "coerce" or errors is lib.no_default:
# for non-td64, the default is already to coerce.
blk = self.coerce_to_target_dtype(value)
return blk.fillna(value, limit, inplace, downcast)
else:
raise

new_values = self.values.fillna(value=value, limit=limit)
return [self.make_block_same_class(values=new_values)]


Expand Down
9 changes: 7 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,9 +410,14 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:

return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value)

def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
def fillna(self: T, value, limit, inplace: bool, downcast, errors) -> T:
return self.apply(
"fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
"fillna",
value=value,
limit=limit,
inplace=inplace,
downcast=downcast,
errors=errors,
)

def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
Expand Down
Loading