From e2d2c4b451a976e1766e1fff92003cfc5e229bf2 Mon Sep 17 00:00:00 2001 From: WBare Date: Tue, 9 May 2017 14:05:21 -0400 Subject: [PATCH 1/5] Fix interpolate -limit Add interpolate limit_direction='inside' --- doc/source/whatsnew/v0.21.0.txt | 11 ++++++++++ pandas/core/missing.py | 37 +++++++++++++++++---------------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 36dffc3d3378b..d1a6cc93ffe0d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -20,6 +20,12 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ +- DataFrame.interpolate() has a new setting: limit_direction='inside'. + This will cause the interpolation to fill missing values only when + the missing value is surounded by valid values. It is useful when + a series needs to be interpolated, but must not expand into NaN + values that were outside the range of the original series. (GH16284) + .. _whatsnew_0210.enhancements.other: @@ -107,6 +113,11 @@ Reshaping Numeric ^^^^^^^ +- DataFrame.interpolate was not respecting limit_direction when + limit=0 (unlimited). Specifically, it would always use + limit_direction='forward' even when specified otherwise. Now + default limit=0 will work with other directions. (GH16282) + Other diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 3010348423340..5798b7e0a86b7 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -149,7 +149,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): yield x - valid_limit_directions = ['forward', 'backward', 'both'] + valid_limit_directions = ['forward', 'backward', 'both', 'inside'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError('Invalid limit_direction: expecting one of %r, got ' @@ -172,23 +172,24 @@ def _interp_limit(invalid, fw_limit, bw_limit): # c) Limit is nonzero and it is further than limit from the nearest non-NaN # value (with respect to the limit_direction setting). # - # The default behavior is to fill forward with no limit, ignoring NaNs at - # the beginning (see issues #9218 and #10420) - violate_limit = sorted(start_nans) - - if limit is not None: - if not is_integer(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') - if limit_direction == 'forward': - violate_limit = sorted(start_nans | set(_interp_limit(invalid, - limit, 0))) - if limit_direction == 'backward': - violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, - limit))) - if limit_direction == 'both': - violate_limit = sorted(_interp_limit(invalid, limit, limit)) + # If Limit is not an integer greater than 0, then use default behavior + # of filling without limit in the direction specified by limit_direction + + if not (is_integer(limit) and limit > 0): + limit = len(xvalues) + + # each possible limit_direction + if limit_direction == 'forward': + violate_limit = sorted(start_nans | + set(_interp_limit(invalid, limit, 0))) + elif limit_direction == 'backward': + violate_limit = sorted(end_nans | + set(_interp_limit(invalid, 0, limit))) + elif limit_direction == 'both': + violate_limit = sorted(_interp_limit(invalid, limit, limit)) + elif limit_direction == 'inside': + violate_limit = sorted(start_nans | end_nans | + set(_interp_limit(invalid, limit, limit))) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) From 8c08696f80b1936b95af0db376ca9672af7f4b2c Mon Sep 17 00:00:00 2001 From: WBare Date: Wed, 17 May 2017 13:34:23 -0400 Subject: [PATCH 2/5] Prior change incorrectly eliminated -limit type check --- pandas/core/missing.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 5798b7e0a86b7..d4287b3c5f0a1 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -175,8 +175,13 @@ def _interp_limit(invalid, fw_limit, bw_limit): # If Limit is not an integer greater than 0, then use default behavior # of filling without limit in the direction specified by limit_direction - if not (is_integer(limit) and limit > 0): + # default limit is unlimited GH Issue:16282 + if limit is None: limit = len(xvalues) + elif not is_integer(limit): + raise ValueError('Limit must be an integer') + elif limit < 1: + raise ValueError('Limit must be greater than 0') # each possible limit_direction if limit_direction == 'forward': From 50e3f712e8631940e7d7de402d1b3f0479d0f735 Mon Sep 17 00:00:00 2001 From: WBare Date: Wed, 17 May 2017 13:35:41 -0400 Subject: [PATCH 3/5] TST: Add test for -limit_direction and -limit=None default --- pandas/tests/series/test_missing.py | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index c52c41877d5c0..24cb2e2235c2e 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -931,6 +931,42 @@ def test_interp_limit_forward(self): limit_direction='FORWARD') assert_series_equal(result, expected) + def test_interp_limit_inside(self): + # these test are for issue #16307 'inside' direction + s = Series([np.nan, 1, 3, np.nan, np.nan, np.nan, 11, np.nan]) + expected = Series([np.nan, 1, 3, 5, np.nan, 9, 11, np.nan]) + result = s.interpolate(method='linear', limit=1, + limit_direction='inside') + assert_series_equal(result, expected) + result = s.interpolate(method='linear', limit=1, + limit_direction='INSIDE') + assert_series_equal(result, expected) + + def test_interp_unlimited(self): + # these test are for issue #16282 default Limit=None is unlimited + s = Series([np.nan, 1., 3., np.nan, np.nan, np.nan, 11., np.nan]) + expected = Series([np.nan, 1., 3., 5., 7., 9., 11., np.nan]) + result = s.interpolate(method='linear', + limit_direction='inside') + assert_series_equal(result, expected) + + expected = Series([1., 1., 3., 5., 7., 9., 11., 11.]) + result = s.interpolate(method='linear', + limit_direction='both') + assert_series_equal(result, expected) + + expected = Series([np.nan, 1., 3., 5., 7., 9., 11., 11.]) + result = s.interpolate(method='linear', + limit_direction='forward') + assert_series_equal(result, expected) + + expected = Series([1., 1., 3., 5., 7., 9., 11., np.nan]) + result = s.interpolate(method='linear', + limit_direction='backward') + assert_series_equal(result, expected) + + + def test_interp_limit_bad_direction(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) @@ -941,6 +977,7 @@ def test_interp_limit_bad_direction(self): pytest.raises(ValueError, s.interpolate, method='linear', limit_direction='abc') + def test_interp_limit_direction(self): # These tests are for issue #9218 -- fill NaNs in both directions. s = Series([1, 3, np.nan, np.nan, np.nan, 11]) From 47daeaf7132f55524418eff8b1b55111e6742585 Mon Sep 17 00:00:00 2001 From: WBare Date: Wed, 17 May 2017 13:42:34 -0400 Subject: [PATCH 4/5] Style: Remove repeated blank lines around new tests --- pandas/tests/series/test_missing.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 24cb2e2235c2e..c13c38b8062df 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -965,8 +965,6 @@ def test_interp_unlimited(self): limit_direction='backward') assert_series_equal(result, expected) - - def test_interp_limit_bad_direction(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) @@ -977,7 +975,6 @@ def test_interp_limit_bad_direction(self): pytest.raises(ValueError, s.interpolate, method='linear', limit_direction='abc') - def test_interp_limit_direction(self): # These tests are for issue #9218 -- fill NaNs in both directions. s = Series([1, 3, np.nan, np.nan, np.nan, 11]) From aa929858a77fd3af5e4f77777cb45c4ae7993ed4 Mon Sep 17 00:00:00 2001 From: WBare Date: Sun, 21 May 2017 16:11:31 -0400 Subject: [PATCH 5/5] Fix spelling error --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 33a7e222c11af..76c975952bc4d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -21,7 +21,7 @@ New features ~~~~~~~~~~~~ - DataFrame.interpolate() has a new setting: limit_direction='inside'. This will cause the interpolation to fill missing values only when - the missing value is surounded by valid values. It is useful when + the missing value is surrounded by valid values. It is useful when a series needs to be interpolated, but must not expand into NaN values that were outside the range of the original series. (GH16284) - Support for `PEP 519 -- Adding a file system path protocol