From c9f54fdd060d20f5e3fe939617e1a018e322e559 Mon Sep 17 00:00:00 2001 From: TimJ Date: Thu, 27 Jun 2019 11:08:19 +0100 Subject: [PATCH 1/6] BUG/TST: Validating fillna limit keyword fillna method was not throwing an ValueError with a dataframe full of integers (no NAs) and limit keyword set to a negative number. Issue can be seen in GH27042. Begun with writing tests for checking the behaviour of fillna and values of limit in test/frame/test_missing.py for both negative and non-integer values of limit. Then wrote new test file tests/util/test_validate_fillna_kwargs.py, where the function would now also take limit as an argument and perform necassary checks in there. Made changes to util/_validators.py to correctly check values of limit as was already done in core/internal/blocks.py Updated calls to validate_fillna_kwargs in various files to include the limit argument. --- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/categorical.py | 4 +- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/numpy_.py | 2 +- pandas/core/generic.py | 2 +- pandas/core/internals/blocks.py | 4 -- pandas/tests/frame/test_missing.py | 16 +++++++ .../tests/util/test_validate_fillna_kwargs.py | 42 +++++++++++++++++++ pandas/util/_validators.py | 13 ++++-- 9 files changed, 74 insertions(+), 13 deletions(-) create mode 100644 pandas/tests/util/test_validate_fillna_kwargs.py diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index d1dfb6b5e8599..3acdec2bcb5e5 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -445,7 +445,7 @@ def fillna(self, value=None, method=None, limit=None): from pandas.util._validators import validate_fillna_kwargs from pandas.core.missing import pad_1d, backfill_1d - value, method = validate_fillna_kwargs(value, method) + value, method, limit = validate_fillna_kwargs(value, method, limit) mask = self.isna() diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 155638aca5560..05a0153a4f9a8 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1756,8 +1756,8 @@ def fillna(self, value=None, method=None, limit=None): ------- filled : Categorical with NA/NaN filled """ - value, method = validate_fillna_kwargs( - value, method, validate_scalar_dict_value=False + value, method, limit = validate_fillna_kwargs( + value, method, limit, validate_scalar_dict_value=False ) if value is None: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ebf1f692ccde6..8391d746116c3 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -767,7 +767,7 @@ def fillna(self, value=None, method=None, limit=None): if isinstance(value, ABCSeries): value = value.array - value, method = validate_fillna_kwargs(value, method) + value, method, limit = validate_fillna_kwargs(value, method, limit) mask = self.isna() diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index f651f89fab834..149cf8600d6a3 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -254,7 +254,7 @@ def isna(self): def fillna(self, value=None, method=None, limit=None): # TODO(_values_for_fillna): remove this - value, method = validate_fillna_kwargs(value, method) + value, method, limit = validate_fillna_kwargs(value, method, limit) mask = self.isna() diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 992c83e66090e..caf60da8333db 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6025,7 +6025,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, 3 NaN 3.0 NaN 4 """ inplace = validate_bool_kwarg(inplace, 'inplace') - value, method = validate_fillna_kwargs(value, method) + value, method, limit = validate_fillna_kwargs(value, method, limit) self._consolidate_inplace() diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c6be56df7ae0c..b4c1aecbf1fbb 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -352,10 +352,6 @@ def fillna(self, value, limit=None, inplace=False, downcast=None): mask = isna(self.values) if limit is not None: - if not is_integer(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') if self.ndim > 2: raise NotImplementedError("number of dimensions for 'fillna' " "is currently limited to 2") diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index a1dbeba364240..c72951ac4cdfa 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -516,6 +516,22 @@ def test_fillna_skip_certain_blocks(self): # it works! df.fillna(np.nan) + @pytest.mark.parametrize("type", [int, float]) + def test_fillna_positive_limit(self, type): + df = DataFrame(np.random.randn(10, 4)).astype(type) + + msg = "Limit must be greater than 0" + with pytest.raises(ValueError, match=msg): + df.fillna(0, limit=-5) + + @pytest.mark.parametrize("type", [int, float]) + def test_fillna_integer_limit(self, type): + df = DataFrame(np.random.randn(10, 4)).astype(type) + + msg = "Limit must be an integer" + with pytest.raises(ValueError, match=msg): + df.fillna(0, limit=0.5) + def test_fillna_inplace(self): df = DataFrame(np.random.randn(10, 4)) df[1][:4] = np.nan diff --git a/pandas/tests/util/test_validate_fillna_kwargs.py b/pandas/tests/util/test_validate_fillna_kwargs.py new file mode 100644 index 0000000000000..0155bdfe74351 --- /dev/null +++ b/pandas/tests/util/test_validate_fillna_kwargs.py @@ -0,0 +1,42 @@ +import pytest + +from pandas.util._validators import validate_fillna_kwargs + +def test_no_value_or_method(): + + msg = "Must specify a fill 'value' or 'method'." + + with pytest.raises(ValueError, match=msg): + validate_fillna_kwargs(None, None, None) + + +@pytest.mark.parametrize("value", [0, {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4}]) +@pytest.mark.parametrize("method", ['backfill', 'bfill', 'pad', 'ffill']) +def test_value_and_method(value, method): + + msg = "Cannot specify both 'value' and 'method'." + + with pytest.raises(ValueError, match=msg): + validate_fillna_kwargs(value, method, None) + +@pytest.mark.parametrize("value", [(1, 2, 3), [1, 2, 3]]) +def test_valid_value(value): + + msg = ('"value" parameter must be a scalar or dict, but ' + 'you passed a "{0}"'.format(type(value).__name__)) + + with pytest.raises(TypeError, match=msg): + validate_fillna_kwargs(value, None, None) + +def test_integer_limit(): + + msg = "Limit must be an integer" + with pytest.raises(ValueError, match=msg): + validate_fillna_kwargs(0,None, 0.5) +def test_positive_limit(): + + msg = "Limit must be greater than 0" + with pytest.raises(ValueError, match=msg): + validate_fillna_kwargs(5, None, -5) + + diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 41faaf68d7f40..d94570562fb37 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -4,7 +4,7 @@ """ import warnings -from pandas.core.dtypes.common import is_bool +from pandas.core.dtypes.common import is_bool, is_integer def _check_arg_length(fname, args, max_fname_arg_count, compat_args): @@ -322,7 +322,8 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): return out -def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): +def validate_fillna_kwargs(value, method, limit, + validate_scalar_dict_value=True): """Validate the keyword arguments to 'fillna'. This checks that exactly one of 'value' and 'method' is specified. @@ -355,4 +356,10 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): elif value is not None and method is not None: raise ValueError("Cannot specify both 'value' and 'method'.") - return value, method + if limit is not None: + if not is_integer(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') + + return value, method, limit From b39604bed1045e42806e68d51c6ac2ec6748e9b0 Mon Sep 17 00:00:00 2001 From: TimJ Date: Thu, 27 Jun 2019 11:30:10 +0100 Subject: [PATCH 2/6] Added Whatsnew Entry --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 18a3785867714..719c8cb369832 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -685,6 +685,7 @@ Missing - Fixed misleading exception message in :meth:`Series.interpolate` if argument ``order`` is required, but omitted (:issue:`10633`, :issue:`24014`). - Fixed class type displayed in exception message in :meth:`DataFrame.dropna` if invalid ``axis`` parameter passed (:issue:`25555`) +- Fixed not throwing ValueError when a DataFrame of full integers has fillna called with a negative limit value - MultiIndex From 01df11f5838ce0f080e961338c9b89f8c2c319aa Mon Sep 17 00:00:00 2001 From: TimJ Date: Thu, 27 Jun 2019 11:39:30 +0100 Subject: [PATCH 3/6] Style changes --- pandas/tests/util/test_validate_fillna_kwargs.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/tests/util/test_validate_fillna_kwargs.py b/pandas/tests/util/test_validate_fillna_kwargs.py index 0155bdfe74351..bd9b8881f080a 100644 --- a/pandas/tests/util/test_validate_fillna_kwargs.py +++ b/pandas/tests/util/test_validate_fillna_kwargs.py @@ -2,23 +2,26 @@ from pandas.util._validators import validate_fillna_kwargs + def test_no_value_or_method(): msg = "Must specify a fill 'value' or 'method'." with pytest.raises(ValueError, match=msg): - validate_fillna_kwargs(None, None, None) + validate_fillna_kwargs(None, None, None) -@pytest.mark.parametrize("value", [0, {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4}]) +@pytest.mark.parametrize("value", + [0, {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4}]) @pytest.mark.parametrize("method", ['backfill', 'bfill', 'pad', 'ffill']) def test_value_and_method(value, method): msg = "Cannot specify both 'value' and 'method'." - + with pytest.raises(ValueError, match=msg): validate_fillna_kwargs(value, method, None) + @pytest.mark.parametrize("value", [(1, 2, 3), [1, 2, 3]]) def test_valid_value(value): @@ -28,15 +31,16 @@ def test_valid_value(value): with pytest.raises(TypeError, match=msg): validate_fillna_kwargs(value, None, None) + def test_integer_limit(): msg = "Limit must be an integer" with pytest.raises(ValueError, match=msg): - validate_fillna_kwargs(0,None, 0.5) + validate_fillna_kwargs(0, None, 0.5) + + def test_positive_limit(): msg = "Limit must be greater than 0" with pytest.raises(ValueError, match=msg): validate_fillna_kwargs(5, None, -5) - - From ee72cd77c52759396fdaf4dc933d24293fba5b68 Mon Sep 17 00:00:00 2001 From: TimJ Date: Thu, 27 Jun 2019 12:05:54 +0100 Subject: [PATCH 4/6] More stylistic changes --- .../tests/util/test_validate_fillna_kwargs.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/util/test_validate_fillna_kwargs.py b/pandas/tests/util/test_validate_fillna_kwargs.py index bd9b8881f080a..b6280ae359efe 100644 --- a/pandas/tests/util/test_validate_fillna_kwargs.py +++ b/pandas/tests/util/test_validate_fillna_kwargs.py @@ -25,22 +25,22 @@ def test_value_and_method(value, method): @pytest.mark.parametrize("value", [(1, 2, 3), [1, 2, 3]]) def test_valid_value(value): - msg = ('"value" parameter must be a scalar or dict, but ' - 'you passed a "{0}"'.format(type(value).__name__)) + msg = ('"value" parameter must be a scalar or dict, but ' + 'you passed a "{0}"'.format(type(value).__name__)) - with pytest.raises(TypeError, match=msg): - validate_fillna_kwargs(value, None, None) + with pytest.raises(TypeError, match=msg): + validate_fillna_kwargs(value, None, None) def test_integer_limit(): - msg = "Limit must be an integer" - with pytest.raises(ValueError, match=msg): - validate_fillna_kwargs(0, None, 0.5) + msg = "Limit must be an integer" + with pytest.raises(ValueError, match=msg): + validate_fillna_kwargs(0, None, 0.5) def test_positive_limit(): - msg = "Limit must be greater than 0" - with pytest.raises(ValueError, match=msg): - validate_fillna_kwargs(5, None, -5) + msg = "Limit must be greater than 0" + with pytest.raises(ValueError, match=msg): + validate_fillna_kwargs(5, None, -5) From 5a27113f87eb38f5181feba7637a9f8bb9f8bc74 Mon Sep 17 00:00:00 2001 From: TimJ Date: Thu, 27 Jun 2019 13:01:27 +0100 Subject: [PATCH 5/6] Make whatsnew addition more consistent --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 719c8cb369832..2128ba8fc046d 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -685,7 +685,7 @@ Missing - Fixed misleading exception message in :meth:`Series.interpolate` if argument ``order`` is required, but omitted (:issue:`10633`, :issue:`24014`). - Fixed class type displayed in exception message in :meth:`DataFrame.dropna` if invalid ``axis`` parameter passed (:issue:`25555`) -- Fixed not throwing ValueError when a DataFrame of full integers has fillna called with a negative limit value +- Fixed not throwing ValueError in :meth:`DataFrame.fillna` when called with a negative ``limit`` parameter value (:issue:`27042`) - MultiIndex From c9c39c45e28ffe7be2d980e6d26ccf4858de2a4b Mon Sep 17 00:00:00 2001 From: TimJ Date: Thu, 27 Jun 2019 15:18:41 +0100 Subject: [PATCH 6/6] Removed unnecessary parametrization --- pandas/tests/util/test_validate_fillna_kwargs.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/tests/util/test_validate_fillna_kwargs.py b/pandas/tests/util/test_validate_fillna_kwargs.py index b6280ae359efe..8b8ea079ab0b5 100644 --- a/pandas/tests/util/test_validate_fillna_kwargs.py +++ b/pandas/tests/util/test_validate_fillna_kwargs.py @@ -11,15 +11,12 @@ def test_no_value_or_method(): validate_fillna_kwargs(None, None, None) -@pytest.mark.parametrize("value", - [0, {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4}]) -@pytest.mark.parametrize("method", ['backfill', 'bfill', 'pad', 'ffill']) -def test_value_and_method(value, method): +def test_value_and_method(): msg = "Cannot specify both 'value' and 'method'." with pytest.raises(ValueError, match=msg): - validate_fillna_kwargs(value, method, None) + validate_fillna_kwargs(0, 'ffill', None) @pytest.mark.parametrize("value", [(1, 2, 3), [1, 2, 3]])