diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 0a3a440ced54f..76c975952bc4d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -19,7 +19,11 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ - +- DataFrame.interpolate() has a new setting: limit_direction='inside'. + This will cause the interpolation to fill missing values only when + the missing value is surrounded by valid values. It is useful when + a series needs to be interpolated, but must not expand into NaN + values that were outside the range of the original series. (GH16284) - Support for `PEP 519 -- Adding a file system path protocol `_ on most readers and writers (:issue:`13823`) - Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`, @@ -118,6 +122,11 @@ Reshaping Numeric ^^^^^^^ +- DataFrame.interpolate was not respecting limit_direction when + limit=0 (unlimited). Specifically, it would always use + limit_direction='forward' even when specified otherwise. Now + default limit=0 will work with other directions. (GH16282) + Categorical ^^^^^^^^^^^ diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 3010348423340..d4287b3c5f0a1 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -149,7 +149,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): yield x - valid_limit_directions = ['forward', 'backward', 'both'] + valid_limit_directions = ['forward', 'backward', 'both', 'inside'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError('Invalid limit_direction: expecting one of %r, got ' @@ -172,23 +172,29 @@ def _interp_limit(invalid, fw_limit, bw_limit): # c) Limit is nonzero and it is further than limit from the nearest non-NaN # value (with respect to the limit_direction setting). # - # The default behavior is to fill forward with no limit, ignoring NaNs at - # the beginning (see issues #9218 and #10420) - violate_limit = sorted(start_nans) - - if limit is not None: - if not is_integer(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') - if limit_direction == 'forward': - violate_limit = sorted(start_nans | set(_interp_limit(invalid, - limit, 0))) - if limit_direction == 'backward': - violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, - limit))) - if limit_direction == 'both': - violate_limit = sorted(_interp_limit(invalid, limit, limit)) + # If Limit is not an integer greater than 0, then use default behavior + # of filling without limit in the direction specified by limit_direction + + # default limit is unlimited GH Issue:16282 + if limit is None: + limit = len(xvalues) + elif not is_integer(limit): + raise ValueError('Limit must be an integer') + elif limit < 1: + raise ValueError('Limit must be greater than 0') + + # each possible limit_direction + if limit_direction == 'forward': + violate_limit = sorted(start_nans | + set(_interp_limit(invalid, limit, 0))) + elif limit_direction == 'backward': + violate_limit = sorted(end_nans | + set(_interp_limit(invalid, 0, limit))) + elif limit_direction == 'both': + violate_limit = sorted(_interp_limit(invalid, limit, limit)) + elif limit_direction == 'inside': + violate_limit = sorted(start_nans | end_nans | + set(_interp_limit(invalid, limit, limit))) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index c52c41877d5c0..c13c38b8062df 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -931,6 +931,40 @@ def test_interp_limit_forward(self): limit_direction='FORWARD') assert_series_equal(result, expected) + def test_interp_limit_inside(self): + # these test are for issue #16307 'inside' direction + s = Series([np.nan, 1, 3, np.nan, np.nan, np.nan, 11, np.nan]) + expected = Series([np.nan, 1, 3, 5, np.nan, 9, 11, np.nan]) + result = s.interpolate(method='linear', limit=1, + limit_direction='inside') + assert_series_equal(result, expected) + result = s.interpolate(method='linear', limit=1, + limit_direction='INSIDE') + assert_series_equal(result, expected) + + def test_interp_unlimited(self): + # these test are for issue #16282 default Limit=None is unlimited + s = Series([np.nan, 1., 3., np.nan, np.nan, np.nan, 11., np.nan]) + expected = Series([np.nan, 1., 3., 5., 7., 9., 11., np.nan]) + result = s.interpolate(method='linear', + limit_direction='inside') + assert_series_equal(result, expected) + + expected = Series([1., 1., 3., 5., 7., 9., 11., 11.]) + result = s.interpolate(method='linear', + limit_direction='both') + assert_series_equal(result, expected) + + expected = Series([np.nan, 1., 3., 5., 7., 9., 11., 11.]) + result = s.interpolate(method='linear', + limit_direction='forward') + assert_series_equal(result, expected) + + expected = Series([1., 1., 3., 5., 7., 9., 11., np.nan]) + result = s.interpolate(method='linear', + limit_direction='backward') + assert_series_equal(result, expected) + def test_interp_limit_bad_direction(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11])