Skip to content

BUG: Interpolate limit=n GH16282 #16429

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ Reshaping

Numeric
^^^^^^^

- Bug in .interpolate(), where limit_direction was not respected when limit=None (default) was passed (:issue:16282)



Expand Down
60 changes: 33 additions & 27 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,35 +160,41 @@ def _interp_limit(invalid, fw_limit, bw_limit):
start_nans = set(range(ys.first_valid_index()))
end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

# This is a list of the indexes in the series whose yvalue is currently
# NaN, but whose interpolated yvalue will be overwritten with NaN after
# computing the interpolation. For each index in this list, one of these
# conditions is true of the corresponding NaN in the yvalues:
# violate_limit is a list of the indexes in the series whose yvalue is
# currently NaN, and should still be NaN after the interpolation.
# Specifically:
#
# a) It is one of a chain of NaNs at the beginning of the series, and
# either limit is not specified or limit_direction is 'forward'.
# b) It is one of a chain of NaNs at the end of the series, and limit is
# specified and limit_direction is 'backward' or 'both'.
# c) Limit is nonzero and it is further than limit from the nearest non-NaN
# value (with respect to the limit_direction setting).
# If limit_direction='forward' or None then the list will contain NaNs at
# the beginning of the series, and NaNs that are more than 'limit' away
# from the prior non-NaN.
#
# The default behavior is to fill forward with no limit, ignoring NaNs at
# the beginning (see issues #9218 and #10420)
violate_limit = sorted(start_nans)

if limit is not None:
if not is_integer(limit):
raise ValueError('Limit must be an integer')
if limit < 1:
raise ValueError('Limit must be greater than 0')
if limit_direction == 'forward':
violate_limit = sorted(start_nans | set(_interp_limit(invalid,
limit, 0)))
if limit_direction == 'backward':
violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0,
limit)))
if limit_direction == 'both':
violate_limit = sorted(_interp_limit(invalid, limit, limit))
# If limit_direction='backward' then the list will contain NaNs at
# the end of the series, and NaNs that are more than 'limit' away
# from the subsequent non-NaN.
#
# If limit_direction='both' then the list will contain NaNs that
# are more than 'limit' away from any non-NaN.
#
# If limit=None, then use default behavior of filling an unlimited number
# of NaNs in the direction specified by limit_direction

# default limit is unlimited GH #16282
if limit is None:
limit = len(xvalues)
elif not is_integer(limit):
raise ValueError('Limit must be an integer')
elif limit < 1:
raise ValueError('Limit must be greater than 0')

# each possible limit_direction
if limit_direction == 'forward':
violate_limit = sorted(start_nans |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomAugspurger not sure if this is where the perf issue is (maybe because not this IS hit in all code).

these can be done via Index set ops

IOW

start_nans = Index(range(.....))

violate_limit = start_nans.union(Index(_interp_limit(invalid, limit, 0))).sort_values()

for example

set(_interp_limit(invalid, limit, 0)))
elif limit_direction == 'backward':
violate_limit = sorted(end_nans |
set(_interp_limit(invalid, 0, limit)))
elif limit_direction == 'both':
violate_limit = sorted(_interp_limit(invalid, limit, limit))

xvalues = getattr(xvalues, 'values', xvalues)
yvalues = getattr(yvalues, 'values', yvalues)
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/series/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,24 @@ def test_interp_limit_forward(self):
limit_direction='FORWARD')
assert_series_equal(result, expected)

def test_interp_unlimited(self):
# these test are for issue #16282 default Limit=None is unlimited
s = Series([np.nan, 1., 3., np.nan, np.nan, np.nan, 11., np.nan])
expected = Series([1., 1., 3., 5., 7., 9., 11., 11.])
result = s.interpolate(method='linear',
limit_direction='both')
assert_series_equal(result, expected)

expected = Series([np.nan, 1., 3., 5., 7., 9., 11., 11.])
result = s.interpolate(method='linear',
limit_direction='forward')
assert_series_equal(result, expected)

expected = Series([1., 1., 3., 5., 7., 9., 11., np.nan])
result = s.interpolate(method='linear',
limit_direction='backward')
assert_series_equal(result, expected)

def test_interp_limit_bad_direction(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def test_mut_exclusive():
com._mut_exclusive(a=1, b=2)
assert com._mut_exclusive(a=1, b=None) == 1
assert com._mut_exclusive(major=None, major_axis=None) is None
assert com._mut_exclusive(a=None, b=2) == 2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this change intentional? If not I can remove it just before merging.



def test_get_callable_name():
Expand Down