diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index e0a8065d9a507..74a0212e9a4f7 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -77,7 +77,7 @@ Reshaping Numeric ^^^^^^^ - +- Bug in .interpolate(), where limit_direction was not respected when limit=None (default) was passed (:issue:16282) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 3010348423340..51778684d68f5 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -160,35 +160,41 @@ def _interp_limit(invalid, fw_limit, bw_limit): start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) - # This is a list of the indexes in the series whose yvalue is currently - # NaN, but whose interpolated yvalue will be overwritten with NaN after - # computing the interpolation. For each index in this list, one of these - # conditions is true of the corresponding NaN in the yvalues: + # violate_limit is a list of the indexes in the series whose yvalue is + # currently NaN, and should still be NaN after the interpolation. + # Specifically: # - # a) It is one of a chain of NaNs at the beginning of the series, and - # either limit is not specified or limit_direction is 'forward'. - # b) It is one of a chain of NaNs at the end of the series, and limit is - # specified and limit_direction is 'backward' or 'both'. - # c) Limit is nonzero and it is further than limit from the nearest non-NaN - # value (with respect to the limit_direction setting). + # If limit_direction='forward' or None then the list will contain NaNs at + # the beginning of the series, and NaNs that are more than 'limit' away + # from the prior non-NaN. # - # The default behavior is to fill forward with no limit, ignoring NaNs at - # the beginning (see issues #9218 and #10420) - violate_limit = sorted(start_nans) - - if limit is not None: - if not is_integer(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') - if limit_direction == 'forward': - violate_limit = sorted(start_nans | set(_interp_limit(invalid, - limit, 0))) - if limit_direction == 'backward': - violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, - limit))) - if limit_direction == 'both': - violate_limit = sorted(_interp_limit(invalid, limit, limit)) + # If limit_direction='backward' then the list will contain NaNs at + # the end of the series, and NaNs that are more than 'limit' away + # from the subsequent non-NaN. + # + # If limit_direction='both' then the list will contain NaNs that + # are more than 'limit' away from any non-NaN. + # + # If limit=None, then use default behavior of filling an unlimited number + # of NaNs in the direction specified by limit_direction + + # default limit is unlimited GH #16282 + if limit is None: + limit = len(xvalues) + elif not is_integer(limit): + raise ValueError('Limit must be an integer') + elif limit < 1: + raise ValueError('Limit must be greater than 0') + + # each possible limit_direction + if limit_direction == 'forward': + violate_limit = sorted(start_nans | + set(_interp_limit(invalid, limit, 0))) + elif limit_direction == 'backward': + violate_limit = sorted(end_nans | + set(_interp_limit(invalid, 0, limit))) + elif limit_direction == 'both': + violate_limit = sorted(_interp_limit(invalid, limit, limit)) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index c52c41877d5c0..8e73c17684a16 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -931,6 +931,24 @@ def test_interp_limit_forward(self): limit_direction='FORWARD') assert_series_equal(result, expected) + def test_interp_unlimited(self): + # these test are for issue #16282 default Limit=None is unlimited + s = Series([np.nan, 1., 3., np.nan, np.nan, np.nan, 11., np.nan]) + expected = Series([1., 1., 3., 5., 7., 9., 11., 11.]) + result = s.interpolate(method='linear', + limit_direction='both') + assert_series_equal(result, expected) + + expected = Series([np.nan, 1., 3., 5., 7., 9., 11., 11.]) + result = s.interpolate(method='linear', + limit_direction='forward') + assert_series_equal(result, expected) + + expected = Series([1., 1., 3., 5., 7., 9., 11., np.nan]) + result = s.interpolate(method='linear', + limit_direction='backward') + assert_series_equal(result, expected) + def test_interp_limit_bad_direction(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index d7dbaccb87ee8..77ef535e08964 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -16,6 +16,7 @@ def test_mut_exclusive(): com._mut_exclusive(a=1, b=2) assert com._mut_exclusive(a=1, b=None) == 1 assert com._mut_exclusive(major=None, major_axis=None) is None + assert com._mut_exclusive(a=None, b=2) == 2 def test_get_callable_name():