Skip to content

ENH: support Akima 1D interpolation #11496

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
5 changes: 5 additions & 0 deletions doc/source/whatsnew/v0.17.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Enhancements
objects for the ``filepath_or_buffer`` argument. (:issue:`11033`)
- ``DataFrame`` now uses the fields of a ``namedtuple`` as columns, if columns are not supplied (:issue:`11181`)
- Improve the error message displayed in :func:`pandas.io.gbq.to_gbq` when the DataFrame does not match the schema of the destination table (:issue:`11359`)
- Akima 1D interpolation is now supported (:issue:`7588`)

.. _whatsnew_0171.api:

Expand Down Expand Up @@ -118,3 +119,7 @@ Bug Fixes
- Bug in ``to_excel`` with openpyxl 2.2+ and merging (:issue:`11408`)

- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`)

- Bug in ``Panel.fillna()`` does not fill across axis 0 (:issue:`8251`)

- Bug in ``Panel.fillna()`` loses index names (:issue:`3570`)
5 changes: 2 additions & 3 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas.core.algorithms import factorize
from pandas.core.base import PandasObject, PandasDelegate
import pandas.core.common as com
from pandas.core.missing import interpolate_2d
from pandas.core.missing import pad
from pandas.util.decorators import cache_readonly, deprecate_kwarg

from pandas.core.common import (ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex,
Expand Down Expand Up @@ -1313,8 +1313,7 @@ def fillna(self, value=None, method=None, limit=None):
if method is not None:

values = self.to_dense().reshape(-1, len(self))
values = interpolate_2d(
values, method, 0, None, value).astype(self.categories.dtype)[0]
values = pad(values, method, 0, None, value).astype(self.categories.dtype)[0]
values = _get_codes_for_values(values, self.categories)

else:
Expand Down
32 changes: 10 additions & 22 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2774,39 +2774,27 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
# set the default here, so functions examining the signaure
# can detect if something was set (e.g. in groupby) (GH9221)
if axis is None:
axis = 0
axis = self._stat_axis_name
axis = self._get_axis_number(axis)
method = mis._clean_fill_method(method)

from pandas import DataFrame
if value is None:
if method is None:
raise ValueError('must specify a fill method or value')
if self._is_mixed_type and axis == 1:
if self._is_mixed_type:
if (self.ndim > 2) and (axis == 0):
raise NotImplementedError('cannot fill across axis 0 for mixed dtypes')
if inplace:
raise NotImplementedError()
result = self.T.fillna(method=method, limit=limit).T

# need to downcast here because of all of the transposes
result._data = result._data.downcast()

return result

# > 3d
if self.ndim > 3:
raise NotImplementedError(
'Cannot fillna with a method for > 3dims'
)
raise NotImplementedError('cannot fill inplace for mixed dtypes')
elif (self.ndim == 2) and (axis == 1):
result = self.T.fillna(method=method, limit=limit).T

# 3d
elif self.ndim == 3:
# need to downcast here because of all of the transposes
result._data = result._data.downcast()

# fill in 2d chunks
result = dict([(col, s.fillna(method=method, value=value))
for col, s in compat.iteritems(self)])
return self._constructor.from_dict(result).__finalize__(self)
return result

# 2d or less
method = mis._clean_fill_method(method)
new_data = self._data.interpolate(method=method,
axis=axis,
Expand Down
20 changes: 10 additions & 10 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,12 +910,12 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
values = self.values if inplace else self.values.copy()
values, _, fill_value, _ = self._try_coerce_args(values, fill_value)
values = self._try_operate(values)
values = mis.interpolate_2d(values,
method=method,
axis=axis,
limit=limit,
fill_value=fill_value,
dtype=self.dtype)
values = mis.pad(values,
method=method,
axis=axis,
limit=limit,
fill_value=fill_value,
dtype=self.dtype)
values = self._try_coerce_result(values)

blocks = [self.make_block(values,
Expand Down Expand Up @@ -950,8 +950,8 @@ def func(x):

# process a 1-d slice, returning it
# should the axis argument be handled below in apply_along_axis?
# i.e. not an arg to mis.interpolate_1d
return mis.interpolate_1d(index, x, method=method, limit=limit,
# i.e. not an arg to mis.interpolate
return mis.interpolate(index, x, method=method, limit=limit,
limit_direction=limit_direction,
fill_value=fill_value,
bounds_error=False, **kwargs)
Expand Down Expand Up @@ -2358,7 +2358,7 @@ def make_block_same_class(self, values, placement,
def interpolate(self, method='pad', axis=0, inplace=False,
limit=None, fill_value=None, **kwargs):

values = mis.interpolate_2d(
values = mis.pad(
self.values.to_dense(), method, axis, limit, fill_value)
return self.make_block_same_class(values=values,
placement=self.mgr_locs)
Expand Down Expand Up @@ -3774,7 +3774,7 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None,

# fill if needed
if method is not None or limit is not None:
new_values = mis.interpolate_2d(new_values, method=method,
new_values = mis.pad(new_values, method=method,
limit=limit, fill_value=fill_value)

if self._block.is_sparse:
Expand Down
55 changes: 44 additions & 11 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def _clean_interp_method(method, **kwargs):
valid = ['linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear',
'quadratic', 'cubic', 'barycentric', 'polynomial',
'krogh', 'piecewise_polynomial',
'pchip', 'spline']
'pchip', 'spline', 'akima']
if method in ('spline', 'polynomial') and order is None:
raise ValueError("You must specify the order of the spline or "
"polynomial.")
Expand All @@ -49,9 +49,9 @@ def _clean_interp_method(method, **kwargs):
return method


def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
limit_direction='forward',
fill_value=None, bounds_error=False, order=None, **kwargs):
def interpolate(xvalues, yvalues, method='linear', limit=None,
limit_direction='forward',
fill_value=None, bounds_error=False, order=None, **kwargs):
"""
Logic for the 1-d interpolation. The result should be 1-d, inputs
xvalues and yvalues will each be 1-d arrays of the same length.
Expand Down Expand Up @@ -144,7 +144,7 @@ def _interp_limit(invalid, fw_limit, bw_limit):

sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
'barycentric', 'krogh', 'spline', 'polynomial',
'piecewise_polynomial', 'pchip']
'piecewise_polynomial', 'pchip', 'akima']
if method in sp_methods:
inds = np.asarray(xvalues)
# hack for DatetimeIndex, #1646
Expand All @@ -156,6 +156,8 @@ def _interp_limit(invalid, fw_limit, bw_limit):
bounds_error=bounds_error, order=order, **kwargs)
result[violate_limit] = np.nan
return result
else:
raise ValueError('interpolation method not found')


def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None,
Expand Down Expand Up @@ -214,20 +216,51 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None,
y = y.copy()
if not new_x.flags.writeable:
new_x = new_x.copy()
method = alt_methods[method]
new_y = method(x, y, new_x, **kwargs)
if method == 'akima':
try:
interpolator = interpolate.Akima1DInterpolator(x, y)
except AttributeError:
raise ImportError("Your version of scipy does not support "
"Akima interpolation" )
new_y = interpolator(new_x)
else:
method = alt_methods[method]
new_y = method(x, y, new_x, **kwargs)
return new_y


def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None):
""" perform an actual interpolation of values, values will be make 2-d if
needed fills inplace, returns the result
def pad(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None):
"""
Perform an actual interpolation of values. 1-d values will be made 2-d temporarily.
Returns the result
"""

ndim = values.ndim
shape = values.shape

func = partial(pad, method=method, limit=limit, fill_value=fill_value, dtype=dtype)

if ndim > 2:
if ndim == 3:
if axis == 0:
for n in range(shape[1]):
values[:,n] = func(values[:,n], axis=1)
else:
for n in range(shape[0]):
values[n] = func(values[n], axis=(1 if axis == 1 else 0))
else:
if axis == 0:
for n in range(shape[1]):
values[:,n] = func(values[:,n], axis=0)
else:
for n in range(shape[0]):
values[n] = func(values[n], axis=axis-1)

return values

transf = (lambda x: x) if axis == 0 else (lambda x: x.T)

# reshape a 1 dim if needed
ndim = values.ndim
if values.ndim == 1:
if axis != 0: # pragma: no cover
raise AssertionError("cannot interpolate on a ndim == 1 with "
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,15 @@ def test_interp_alt_scipy(self):
expected.ix[5,'A'] = 6.125
assert_frame_equal(result, expected)

try:
from scipy.interpolate import Akima1DInterpolator
except ImportError:
raise nose.SkipTest('scipy.interpolate.Akima1DInterpolator missing')
result = df.interpolate(method='akima')
expected.ix[2,'A'] = 3
expected.ix[5,'A'] = 6
assert_frame_equal(result, expected)

def test_interp_rowwise(self):
df = DataFrame({0: [1, 2, np.nan, 4],
1: [2, 3, 4, np.nan],
Expand Down
77 changes: 76 additions & 1 deletion pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1454,20 +1454,95 @@ def test_fillna(self):
assert_frame_equal(filled['ItemA'],
panel['ItemA'].fillna(method='backfill'))

# Fill forward.
filled = self.panel.fillna(method='ffill')
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='ffill'))

# With limit.
filled = self.panel.fillna(method='backfill', limit=1)
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='backfill', limit=1))

# With downcast.
rounded = self.panel.apply(lambda x: x.apply(np.round))
filled = rounded.fillna(method='backfill', downcast='infer')
assert_frame_equal(filled['ItemA'],
rounded['ItemA'].fillna(method='backfill', downcast='infer'))

# Now explicitly request axis 1.
filled = self.panel.fillna(method='backfill', axis=1)
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='backfill', axis=0))

# Fill along axis 2, equivalent to filling along axis 1 of each
# DataFrame.
filled = self.panel.fillna(method='backfill', axis=2)
assert_frame_equal(filled['ItemA'],
self.panel['ItemA'].fillna(method='backfill', axis=1))

# Fill an empty panel.
empty = self.panel.reindex(items=[])
filled = empty.fillna(0)
assert_panel_equal(filled, empty)

# either method or value must be specified
self.assertRaises(ValueError, self.panel.fillna)
# method and value can not both be specified
self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill')

# can't pass list or tuple, only scalar
self.assertRaises(TypeError, self.panel.fillna, [1, 2])
self.assertRaises(TypeError, self.panel.fillna, (1, 2))

# limit not implemented when only value is specified
p = Panel(np.random.randn(3,4,5))
p.iloc[0:2,0:2,0:2] = np.nan
self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1))
self.assertRaises(NotImplementedError, lambda : p.fillna(999, limit=1))

def test_fillna_axis_0(self):
# GH 8395

# Forward fill along axis 0, interpolating values across DataFrames.
filled = self.panel.fillna(method='ffill', axis=0)
nan_indexes = self.panel['ItemB']['C'].index[
self.panel['ItemB']['C'].apply(np.isnan)]

# Values from ItemA are filled into ItemB.
assert_series_equal(filled['ItemB']['C'][nan_indexes],
self.panel['ItemA']['C'][nan_indexes])

# Backfill along axis 0.
filled = self.panel.fillna(method='backfill', axis=0)

# The test data lacks values that can be backfilled on axis 0.
assert_panel_equal(filled, self.panel)

# Reverse the panel and backfill along axis 0, to properly test
# backfill.
reverse_panel = self.panel.reindex_axis(reversed(self.panel.axes[0]))
filled = reverse_panel.fillna(method='bfill', axis=0)
nan_indexes = reverse_panel['ItemB']['C'].index[
reverse_panel['ItemB']['C'].apply(np.isnan)]
assert_series_equal(filled['ItemB']['C'][nan_indexes],
reverse_panel['ItemA']['C'][nan_indexes])

# Fill along axis 0 with limit.
filled = self.panel.fillna(method='ffill', axis=0, limit=1)
a_nan = self.panel['ItemA']['C'].index[
self.panel['ItemA']['C'].apply(np.isnan)]
b_nan = self.panel['ItemB']['C'].index[
self.panel['ItemB']['C'].apply(np.isnan)]

# Cells that are nan in ItemB but not in ItemA remain unfilled in
# ItemC.
self.assertTrue(
filled['ItemC']['C'][b_nan.diff(a_nan)].apply(np.isnan).all())

# limit not implemented when only value is specified
panel = self.panel.copy()
panel['str'] = 'foo'
self.assertRaises(NotImplementedError, lambda : panel.fillna(method='ffill', axis=0))

def test_ffill_bfill(self):
assert_panel_equal(self.panel.ffill(),
Expand Down
Loading