Skip to content

BUG/TST: Fillna limit Error Reporting #27074

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,7 @@ Missing

- Fixed misleading exception message in :meth:`Series.interpolate` if argument ``order`` is required, but omitted (:issue:`10633`, :issue:`24014`).
- Fixed class type displayed in exception message in :meth:`DataFrame.dropna` if invalid ``axis`` parameter passed (:issue:`25555`)
- Fixed not throwing ValueError when a DataFrame of full integers has fillna called with a negative limit value
-

MultiIndex
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def fillna(self, value=None, method=None, limit=None):
from pandas.util._validators import validate_fillna_kwargs
from pandas.core.missing import pad_1d, backfill_1d

value, method = validate_fillna_kwargs(value, method)
value, method, limit = validate_fillna_kwargs(value, method, limit)

mask = self.isna()

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1756,8 +1756,8 @@ def fillna(self, value=None, method=None, limit=None):
-------
filled : Categorical with NA/NaN filled
"""
value, method = validate_fillna_kwargs(
value, method, validate_scalar_dict_value=False
value, method, limit = validate_fillna_kwargs(
value, method, limit, validate_scalar_dict_value=False
)

if value is None:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ def fillna(self, value=None, method=None, limit=None):
if isinstance(value, ABCSeries):
value = value.array

value, method = validate_fillna_kwargs(value, method)
value, method, limit = validate_fillna_kwargs(value, method, limit)

mask = self.isna()

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def isna(self):

def fillna(self, value=None, method=None, limit=None):
# TODO(_values_for_fillna): remove this
value, method = validate_fillna_kwargs(value, method)
value, method, limit = validate_fillna_kwargs(value, method, limit)

mask = self.isna()

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6025,7 +6025,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
3 NaN 3.0 NaN 4
"""
inplace = validate_bool_kwarg(inplace, 'inplace')
value, method = validate_fillna_kwargs(value, method)
value, method, limit = validate_fillna_kwargs(value, method, limit)

self._consolidate_inplace()

Expand Down
4 changes: 0 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,10 +352,6 @@ def fillna(self, value, limit=None, inplace=False, downcast=None):

mask = isna(self.values)
if limit is not None:
if not is_integer(limit):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is removing validation in this method right? Shouldn't we replace with a call to validate_fillna_kwargs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Digging into the fillna callstack, it first calls super().fillna() in core/frame.py, which then goes to core/generic.py where validate_fillna_kwargs is called (along with validate_bool_kwarg on inplace), before it then goes into core/internalls/blocks.py. Then in fillna in blocks.py it calls validate_bool_kwarg on inplace again, but originally never did the validate_fillna_kwargs call again. I'm not sure if the checks are just being repeated needlessly or not, first in frame and then in blocks. But at the start of the function validate_fillna_kwargs(value, None, limit) could be called I guess.

raise ValueError('Limit must be an integer')
if limit < 1:
raise ValueError('Limit must be greater than 0')
if self.ndim > 2:
raise NotImplementedError("number of dimensions for 'fillna' "
"is currently limited to 2")
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/frame/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,22 @@ def test_fillna_skip_certain_blocks(self):
# it works!
df.fillna(np.nan)

@pytest.mark.parametrize("type", [int, float])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think these need to be parametrized - the dtype of the calling object is unrelated to the change here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dtype was exactly what was causing the issue though, this test failed before my change. It has to do with IntBlock and FloatBlock having _can_hold_na set to different values, causing a return in blocks.py's fillna method before it checked the validity of limit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe np.random.uniform should be used here instead of np.random.rand which returns all zeros when cast to int, although there isn't much need for the change.

def test_fillna_positive_limit(self, type):
df = DataFrame(np.random.randn(10, 4)).astype(type)

msg = "Limit must be greater than 0"
with pytest.raises(ValueError, match=msg):
df.fillna(0, limit=-5)

@pytest.mark.parametrize("type", [int, float])
def test_fillna_integer_limit(self, type):
df = DataFrame(np.random.randn(10, 4)).astype(type)

msg = "Limit must be an integer"
with pytest.raises(ValueError, match=msg):
df.fillna(0, limit=0.5)

def test_fillna_inplace(self):
df = DataFrame(np.random.randn(10, 4))
df[1][:4] = np.nan
Expand Down
46 changes: 46 additions & 0 deletions pandas/tests/util/test_validate_fillna_kwargs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pytest

from pandas.util._validators import validate_fillna_kwargs


def test_no_value_or_method():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this not tested before?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't find any mention of this specific function in any test files after doing a search for it. There may be higher level tests that check the behaviour of the fillna method for this though.


msg = "Must specify a fill 'value' or 'method'."

with pytest.raises(ValueError, match=msg):
validate_fillna_kwargs(None, None, None)


@pytest.mark.parametrize("value",
[0, {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4}])
@pytest.mark.parametrize("method", ['backfill', 'bfill', 'pad', 'ffill'])
def test_value_and_method(value, method):

msg = "Cannot specify both 'value' and 'method'."

with pytest.raises(ValueError, match=msg):
validate_fillna_kwargs(value, method, None)


@pytest.mark.parametrize("value", [(1, 2, 3), [1, 2, 3]])
def test_valid_value(value):

msg = ('"value" parameter must be a scalar or dict, but '
'you passed a "{0}"'.format(type(value).__name__))

with pytest.raises(TypeError, match=msg):
validate_fillna_kwargs(value, None, None)


def test_integer_limit():

msg = "Limit must be an integer"
with pytest.raises(ValueError, match=msg):
validate_fillna_kwargs(0, None, 0.5)


def test_positive_limit():

msg = "Limit must be greater than 0"
with pytest.raises(ValueError, match=msg):
validate_fillna_kwargs(5, None, -5)
13 changes: 10 additions & 3 deletions pandas/util/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""
import warnings

from pandas.core.dtypes.common import is_bool
from pandas.core.dtypes.common import is_bool, is_integer


def _check_arg_length(fname, args, max_fname_arg_count, compat_args):
Expand Down Expand Up @@ -322,7 +322,8 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name):
return out


def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True):
def validate_fillna_kwargs(value, method, limit,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add limit to the parameters.

validate_scalar_dict_value=True):
"""Validate the keyword arguments to 'fillna'.

This checks that exactly one of 'value' and 'method' is specified.
Expand Down Expand Up @@ -355,4 +356,10 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True):
elif value is not None and method is not None:
raise ValueError("Cannot specify both 'value' and 'method'.")

return value, method
if limit is not None:
if not is_integer(limit):
raise ValueError('Limit must be an integer')
if limit < 1:
raise ValueError('Limit must be greater than 0')

return value, method, limit