Skip to content

Solve missing interpolation method (cubicspline) #33670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ Other enhancements
``df.to_csv(path, compression={'method': 'gzip', 'compresslevel': 1}``
(:issue:`33196`)
- :meth:`~pandas.core.groupby.GroupBy.transform` has gained ``engine`` and ``engine_kwargs`` arguments that supports executing functions with ``Numba`` (:issue:`32854`)
- :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` (:issue:`33670`)
-

.. ---------------------------------------------------------------------------
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6671,9 +6671,9 @@ def replace(
values of the index. Both 'polynomial' and 'spline' require that
you also specify an `order` (int), e.g.
``df.interpolate(method='polynomial', order=5)``.
* 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima':
Wrappers around the SciPy interpolation methods of similar
names. See `Notes`.
* 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima',
'cubicspline': Wrappers around the SciPy interpolation methods of
similar names. See `Notes`.
* 'from_derivatives': Refers to
`scipy.interpolate.BPoly.from_derivatives` which
replaces 'piecewise_polynomial' interpolation method in
Expand Down
95 changes: 86 additions & 9 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def clean_interp_method(method, **kwargs):
"akima",
"spline",
"from_derivatives",
"cubicspline",
]
if method in ("spline", "polynomial") and order is None:
raise ValueError("You must specify the order of the spline or polynomial.")
Expand Down Expand Up @@ -293,6 +294,7 @@ def interpolate_1d(
"piecewise_polynomial",
"pchip",
"akima",
"cubicspline",
]

if method in sp_methods:
Expand Down Expand Up @@ -341,14 +343,11 @@ def _interpolate_scipy_wrapper(
x, new_x = x._values.astype("i8"), new_x.astype("i8")

if method == "pchip":
try:
alt_methods["pchip"] = interpolate.pchip_interpolate
except AttributeError as err:
raise ImportError(
"Your version of Scipy does not support PCHIP interpolation."
) from err
alt_methods["pchip"] = interpolate.pchip_interpolate
elif method == "akima":
alt_methods["akima"] = _akima_interpolate
elif method == "cubicspline":
alt_methods["cubicspline"] = _cubicspline_interpolate

interp1d_methods = [
"nearest",
Expand Down Expand Up @@ -406,7 +405,7 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
der : int or list
How many derivatives to extract; None for all potentially nonzero
derivatives (that is a number equal to the number of points), or a
list of derivatives to extract. This numberincludes the function
list of derivatives to extract. This number includes the function
value as 0th derivative.
extrapolate : bool, optional
Whether to extrapolate to ouf-of-bounds points based on first and last
Expand Down Expand Up @@ -446,8 +445,7 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
A 1-D array of real values. `yi`'s length along the interpolation
axis must be equal to the length of `xi`. If N-D array, use axis
parameter to select correct axis.
x : scalar or array_like
Of length M.
x : scalar or array_like of length M.
der : int or list, optional
How many derivatives to extract; None for all potentially
nonzero derivatives (that is a number equal to the number
Expand Down Expand Up @@ -478,6 +476,85 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
return [P(x, nu) for nu in der]


def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None):
"""
Convenience function for cubic spline data interpolator.

See `scipy.interpolate.CubicSpline` for details.

Parameters
----------
xi : array_like, shape (n,)
1-d array containing values of the independent variable.
Values must be real, finite and in strictly increasing order.
yi : array_like
Array containing values of the dependent variable. It can have
arbitrary number of dimensions, but the length along ``axis``
(see below) must match the length of ``x``. Values must be finite.
x : scalar or array_like, shape (m,)
axis : int, optional
Axis along which `y` is assumed to be varying. Meaning that for
``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.
Default is 0.
bc_type : string or 2-tuple, optional
Boundary condition type. Two additional equations, given by the
boundary conditions, are required to determine all coefficients of
polynomials on each segment [2]_.
If `bc_type` is a string, then the specified condition will be applied
at both ends of a spline. Available conditions are:
* 'not-a-knot' (default): The first and second segment at a curve end
are the same polynomial. It is a good default when there is no
information on boundary conditions.
* 'periodic': The interpolated functions is assumed to be periodic
of period ``x[-1] - x[0]``. The first and last value of `y` must be
identical: ``y[0] == y[-1]``. This boundary condition will result in
``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``.
* 'clamped': The first derivative at curves ends are zero. Assuming
a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition.
* 'natural': The second derivative at curve ends are zero. Assuming
a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition.
If `bc_type` is a 2-tuple, the first and the second value will be
applied at the curve start and end respectively. The tuple values can
be one of the previously mentioned strings (except 'periodic') or a
tuple `(order, deriv_values)` allowing to specify arbitrary
derivatives at curve ends:
* `order`: the derivative order, 1 or 2.
* `deriv_value`: array_like containing derivative values, shape must
be the same as `y`, excluding ``axis`` dimension. For example, if
`y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with
the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D
and have the shape (n0, n1).
extrapolate : {bool, 'periodic', None}, optional
If bool, determines whether to extrapolate to out-of-bounds points
based on first and last intervals, or to return NaNs. If 'periodic',
periodic extrapolation is used. If None (default), ``extrapolate`` is
set to 'periodic' for ``bc_type='periodic'`` and to True otherwise.

See Also
--------
scipy.interpolate.CubicHermiteSpline

Returns
-------
y : scalar or array_like
The result, of shape (m,)

References
----------
.. [1] `Cubic Spline Interpolation
<https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_
on Wikiversity.
.. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978.
"""
from scipy import interpolate

P = interpolate.CubicSpline(
xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate
)

return P(x)


def interpolate_2d(
values, method="pad", axis=0, limit=None, fill_value=None, dtype=None
):
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/series/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"from_derivatives",
"pchip",
"akima",
"cubicspline",
]
)
def nontemporal_method(request):
Expand Down Expand Up @@ -55,6 +56,7 @@ def nontemporal_method(request):
"from_derivatives",
"pchip",
"akima",
"cubicspline",
]
)
def interp_methods_ind(request):
Expand Down Expand Up @@ -97,6 +99,22 @@ def test_interpolate_time_raises_for_non_timeseries(self):
with pytest.raises(ValueError, match=msg):
non_ts.interpolate(method="time")

@td.skip_if_no_scipy
def test_interpolate_cubicspline(self):

ser = Series([10, 11, 12, 13])

expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
result = ser.reindex(new_index).interpolate(method="cubicspline")[1:3]
tm.assert_series_equal(result, expected)

@td.skip_if_no_scipy
def test_interpolate_pchip(self):

Expand Down