From b207adad64e34bd7e80b8e1553f87c66b3d1ab14 Mon Sep 17 00:00:00 2001 From: WBare Date: Mon, 22 May 2017 15:22:55 -0400 Subject: [PATCH 1/5] BUG: Interpolate limit=n GH16282 --- doc/source/whatsnew/v0.21.0.txt | 2 + pandas/core/missing.py | 59 ++++++++++++++++------------- pandas/tests/series/test_missing.py | 18 +++++++++ 3 files changed, 52 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 36dffc3d3378b..aeb6204fd2229 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -106,6 +106,8 @@ Reshaping Numeric ^^^^^^^ +- DataFrame.interpolate was not respecting limit_direction when using the default limit=None (unlimited). Specifically, it would always use limit_direction='forward' even when limit_direction was set otherwise. Now default limit=None will work with other directions. :issue:`16282` + diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 3010348423340..1efe491b8f389 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -160,35 +160,40 @@ def _interp_limit(invalid, fw_limit, bw_limit): start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) - # This is a list of the indexes in the series whose yvalue is currently - # NaN, but whose interpolated yvalue will be overwritten with NaN after - # computing the interpolation. For each index in this list, one of these - # conditions is true of the corresponding NaN in the yvalues: + # violate_limit is a list of the indexes in the series whose yvalue is currently + # NaN, and should still be NaN after the interpolation. Specifically: # - # a) It is one of a chain of NaNs at the beginning of the series, and - # either limit is not specified or limit_direction is 'forward'. - # b) It is one of a chain of NaNs at the end of the series, and limit is - # specified and limit_direction is 'backward' or 'both'. - # c) Limit is nonzero and it is further than limit from the nearest non-NaN - # value (with respect to the limit_direction setting). + # If limit_direction='forward' or None then the list will contain NaNs at + # the beginning of the series, and NaNs that are more than 'limit' away + # from the prior non-NaN. # - # The default behavior is to fill forward with no limit, ignoring NaNs at - # the beginning (see issues #9218 and #10420) - violate_limit = sorted(start_nans) - - if limit is not None: - if not is_integer(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') - if limit_direction == 'forward': - violate_limit = sorted(start_nans | set(_interp_limit(invalid, - limit, 0))) - if limit_direction == 'backward': - violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, - limit))) - if limit_direction == 'both': - violate_limit = sorted(_interp_limit(invalid, limit, limit)) + # If limit_direction='backward' then the list will contain NaNs at + # the end of the series, and NaNs that are more than 'limit' away + # from the subsequent non-NaN. + # + # If limit_direction='both' then the list will contain NaNs that + # are more than 'limit' away from any non-NaN. + # + # If limit=None, then use default behavior of filling an unlimited number + # of NaNs in the direction specified by limit_direction + + # default limit is unlimited GH #16282 + if limit is None: + limit = len(xvalues) + elif not is_integer(limit): + raise ValueError('Limit must be an integer') + elif limit < 1: + raise ValueError('Limit must be greater than 0') + + # each possible limit_direction + if limit_direction == 'forward': + violate_limit = sorted(start_nans | + set(_interp_limit(invalid, limit, 0))) + elif limit_direction == 'backward': + violate_limit = sorted(end_nans | + set(_interp_limit(invalid, 0, limit))) + elif limit_direction == 'both': + violate_limit = sorted(_interp_limit(invalid, limit, limit)) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index c52c41877d5c0..8e73c17684a16 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -931,6 +931,24 @@ def test_interp_limit_forward(self): limit_direction='FORWARD') assert_series_equal(result, expected) + def test_interp_unlimited(self): + # these test are for issue #16282 default Limit=None is unlimited + s = Series([np.nan, 1., 3., np.nan, np.nan, np.nan, 11., np.nan]) + expected = Series([1., 1., 3., 5., 7., 9., 11., 11.]) + result = s.interpolate(method='linear', + limit_direction='both') + assert_series_equal(result, expected) + + expected = Series([np.nan, 1., 3., 5., 7., 9., 11., 11.]) + result = s.interpolate(method='linear', + limit_direction='forward') + assert_series_equal(result, expected) + + expected = Series([1., 1., 3., 5., 7., 9., 11., np.nan]) + result = s.interpolate(method='linear', + limit_direction='backward') + assert_series_equal(result, expected) + def test_interp_limit_bad_direction(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) From 843d427f52e407e2e18fbc623bb6d9db1c490055 Mon Sep 17 00:00:00 2001 From: WBare Date: Mon, 22 May 2017 17:20:29 -0400 Subject: [PATCH 2/5] Fix: comment line over the 80 char limit --- pandas/core/missing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 1efe491b8f389..51778684d68f5 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -160,8 +160,9 @@ def _interp_limit(invalid, fw_limit, bw_limit): start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) - # violate_limit is a list of the indexes in the series whose yvalue is currently - # NaN, and should still be NaN after the interpolation. Specifically: + # violate_limit is a list of the indexes in the series whose yvalue is + # currently NaN, and should still be NaN after the interpolation. + # Specifically: # # If limit_direction='forward' or None then the list will contain NaNs at # the beginning of the series, and NaNs that are more than 'limit' away From e465311ff5a8bc9a8d448803e4860d58a6bd8f3f Mon Sep 17 00:00:00 2001 From: WBare Date: Mon, 22 May 2017 17:34:40 -0400 Subject: [PATCH 3/5] Test: Added small test for code coverage --- pandas/tests/test_common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index d7dbaccb87ee8..77ef535e08964 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -16,6 +16,7 @@ def test_mut_exclusive(): com._mut_exclusive(a=1, b=2) assert com._mut_exclusive(a=1, b=None) == 1 assert com._mut_exclusive(major=None, major_axis=None) is None + assert com._mut_exclusive(a=None, b=2) == 2 def test_get_callable_name(): From 2f7c715cbf58e248faa6209d0370341242a1bd7e Mon Sep 17 00:00:00 2001 From: WBare Date: Tue, 23 May 2017 09:36:41 -0400 Subject: [PATCH 4/5] DOC: Moved whats new comment from 0.21.0 to 0.20.2 --- doc/source/whatsnew/v0.20.2.txt | 2 +- doc/source/whatsnew/v0.21.0.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index e0a8065d9a507..74a0212e9a4f7 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -77,7 +77,7 @@ Reshaping Numeric ^^^^^^^ - +- Bug in .interpolate(), where limit_direction was not respected when limit=None (default) was passed (:issue:16282) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index aeb6204fd2229..131ea14b2637b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -106,7 +106,6 @@ Reshaping Numeric ^^^^^^^ -- DataFrame.interpolate was not respecting limit_direction when using the default limit=None (unlimited). Specifically, it would always use limit_direction='forward' even when limit_direction was set otherwise. Now default limit=None will work with other directions. :issue:`16282` From d3d9cb38f0d9297a568d33f5a2759eeb9bb06850 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 23 May 2017 10:58:33 -0500 Subject: [PATCH 5/5] Update v0.21.0.txt Removed extraneous newline --- doc/source/whatsnew/v0.21.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 131ea14b2637b..36dffc3d3378b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -109,6 +109,5 @@ Numeric - Other ^^^^^