-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG/TST: Fillna limit Error Reporting #27074
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -516,6 +516,22 @@ def test_fillna_skip_certain_blocks(self): | |
# it works! | ||
df.fillna(np.nan) | ||
|
||
@pytest.mark.parametrize("type", [int, float]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think these need to be parametrized - the dtype of the calling object is unrelated to the change here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The dtype was exactly what was causing the issue though, this test failed before my change. It has to do with IntBlock and FloatBlock having _can_hold_na set to different values, causing a return in blocks.py's fillna method before it checked the validity of limit. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe np.random.uniform should be used here instead of np.random.rand which returns all zeros when cast to int, although there isn't much need for the change. |
||
def test_fillna_positive_limit(self, type): | ||
df = DataFrame(np.random.randn(10, 4)).astype(type) | ||
|
||
msg = "Limit must be greater than 0" | ||
with pytest.raises(ValueError, match=msg): | ||
df.fillna(0, limit=-5) | ||
|
||
@pytest.mark.parametrize("type", [int, float]) | ||
def test_fillna_integer_limit(self, type): | ||
df = DataFrame(np.random.randn(10, 4)).astype(type) | ||
|
||
msg = "Limit must be an integer" | ||
with pytest.raises(ValueError, match=msg): | ||
df.fillna(0, limit=0.5) | ||
|
||
def test_fillna_inplace(self): | ||
df = DataFrame(np.random.randn(10, 4)) | ||
df[1][:4] = np.nan | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import pytest | ||
|
||
from pandas.util._validators import validate_fillna_kwargs | ||
|
||
|
||
def test_no_value_or_method(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this not tested before? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I couldn't find any mention of this specific function in any test files after doing a search for it. There may be higher level tests that check the behaviour of the fillna method for this though. |
||
|
||
msg = "Must specify a fill 'value' or 'method'." | ||
|
||
with pytest.raises(ValueError, match=msg): | ||
validate_fillna_kwargs(None, None, None) | ||
|
||
|
||
def test_value_and_method(): | ||
|
||
msg = "Cannot specify both 'value' and 'method'." | ||
|
||
with pytest.raises(ValueError, match=msg): | ||
validate_fillna_kwargs(0, 'ffill', None) | ||
|
||
|
||
@pytest.mark.parametrize("value", [(1, 2, 3), [1, 2, 3]]) | ||
def test_valid_value(value): | ||
|
||
msg = ('"value" parameter must be a scalar or dict, but ' | ||
'you passed a "{0}"'.format(type(value).__name__)) | ||
|
||
with pytest.raises(TypeError, match=msg): | ||
validate_fillna_kwargs(value, None, None) | ||
|
||
|
||
def test_integer_limit(): | ||
|
||
msg = "Limit must be an integer" | ||
with pytest.raises(ValueError, match=msg): | ||
validate_fillna_kwargs(0, None, 0.5) | ||
|
||
|
||
def test_positive_limit(): | ||
|
||
msg = "Limit must be greater than 0" | ||
with pytest.raises(ValueError, match=msg): | ||
validate_fillna_kwargs(5, None, -5) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
""" | ||
import warnings | ||
|
||
from pandas.core.dtypes.common import is_bool | ||
from pandas.core.dtypes.common import is_bool, is_integer | ||
|
||
|
||
def _check_arg_length(fname, args, max_fname_arg_count, compat_args): | ||
|
@@ -322,7 +322,8 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): | |
return out | ||
|
||
|
||
def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): | ||
def validate_fillna_kwargs(value, method, limit, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add |
||
validate_scalar_dict_value=True): | ||
"""Validate the keyword arguments to 'fillna'. | ||
|
||
This checks that exactly one of 'value' and 'method' is specified. | ||
|
@@ -355,4 +356,10 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): | |
elif value is not None and method is not None: | ||
raise ValueError("Cannot specify both 'value' and 'method'.") | ||
|
||
return value, method | ||
if limit is not None: | ||
if not is_integer(limit): | ||
raise ValueError('Limit must be an integer') | ||
if limit < 1: | ||
raise ValueError('Limit must be greater than 0') | ||
|
||
return value, method, limit |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is removing validation in this method right? Shouldn't we replace with a call to
validate_fillna_kwargs
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Digging into the fillna callstack, it first calls super().fillna() in core/frame.py, which then goes to core/generic.py where validate_fillna_kwargs is called (along with validate_bool_kwarg on
inplace
), before it then goes into core/internalls/blocks.py. Then in fillna in blocks.py it calls validate_bool_kwarg oninplace
again, but originally never did the validate_fillna_kwargs call again. I'm not sure if the checks are just being repeated needlessly or not, first in frame and then in blocks. But at the start of the function validate_fillna_kwargs(value, None, limit) could be called I guess.