From ffc0958bfce80d47486bb3a2a33f18f96756ae2e Mon Sep 17 00:00:00 2001 From: khyox Date: Sun, 19 Apr 2020 23:15:21 -0700 Subject: [PATCH 1/6] Solve missing interpolation method (cubicspline) By commit 8bb2cc1 scipy.interpolate.CubicSpline method is referenced in the pandas documentation (see pandas/core/generic.py) but it is not wrapped by any interpolation method. This commit solves this adding the corresponding wrapper. SciPy's CubicSpline is a cubic spline data interpolator that allows explicit control of the boundary conditions for the interval. Changes to be committed: modified: ../../../core/missing.py modified: test_interpolate.py --- pandas/core/missing.py | 93 ++++++++++++++++++- .../tests/series/methods/test_interpolate.py | 18 ++++ 2 files changed, 108 insertions(+), 3 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index c46aed999f45a..6503fcc793696 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -112,6 +112,7 @@ def clean_interp_method(method, **kwargs): "akima", "spline", "from_derivatives", + "cubicspline", ] if method in ("spline", "polynomial") and order is None: raise ValueError("You must specify the order of the spline or polynomial.") @@ -293,6 +294,7 @@ def interpolate_1d( "piecewise_polynomial", "pchip", "akima", + "cubicspline", ] if method in sp_methods: @@ -349,6 +351,13 @@ def _interpolate_scipy_wrapper( ) from err elif method == "akima": alt_methods["akima"] = _akima_interpolate + elif method == "cubicspline": + try: + alt_methods["cubicspline"] = _cubicspline_interpolate + except AttributeError as err: + raise ImportError( + "Your version of Scipy does not support CubicSpline." + ) from err interp1d_methods = [ "nearest", @@ -406,7 +415,7 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): der : int or list How many derivatives to extract; None for all potentially nonzero derivatives (that is a number equal to the number of points), or a - list of derivatives to extract. This numberincludes the function + list of derivatives to extract. This number includes the function value as 0th derivative. extrapolate : bool, optional Whether to extrapolate to ouf-of-bounds points based on first and last @@ -446,8 +455,7 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): A 1-D array of real values. `yi`'s length along the interpolation axis must be equal to the length of `xi`. If N-D array, use axis parameter to select correct axis. - x : scalar or array_like - Of length M. + x : scalar or array_like of length M. der : int or list, optional How many derivatives to extract; None for all potentially nonzero derivatives (that is a number equal to the number @@ -478,6 +486,85 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): return [P(x, nu) for nu in der] +def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None): + """ + Convenience function for cubic spline data interpolator. + + See `scipy.interpolate.CubicSpline` for details. + + Parameters + ---------- + xi : array_like, shape (n,) + 1-d array containing values of the independent variable. + Values must be real, finite and in strictly increasing order. + yi : array_like + Array containing values of the dependent variable. It can have + arbitrary number of dimensions, but the length along ``axis`` + (see below) must match the length of ``x``. Values must be finite. + x : scalar or array_like, shape (m,) + axis : int, optional + Axis along which `y` is assumed to be varying. Meaning that for + ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``. + Default is 0. + bc_type : string or 2-tuple, optional + Boundary condition type. Two additional equations, given by the + boundary conditions, are required to determine all coefficients of + polynomials on each segment [2]_. + If `bc_type` is a string, then the specified condition will be applied + at both ends of a spline. Available conditions are: + * 'not-a-knot' (default): The first and second segment at a curve end + are the same polynomial. It is a good default when there is no + information on boundary conditions. + * 'periodic': The interpolated functions is assumed to be periodic + of period ``x[-1] - x[0]``. The first and last value of `y` must be + identical: ``y[0] == y[-1]``. This boundary condition will result in + ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``. + * 'clamped': The first derivative at curves ends are zero. Assuming + a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition. + * 'natural': The second derivative at curve ends are zero. Assuming + a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition. + If `bc_type` is a 2-tuple, the first and the second value will be + applied at the curve start and end respectively. The tuple values can + be one of the previously mentioned strings (except 'periodic') or a + tuple `(order, deriv_values)` allowing to specify arbitrary + derivatives at curve ends: + * `order`: the derivative order, 1 or 2. + * `deriv_value`: array_like containing derivative values, shape must + be the same as `y`, excluding ``axis`` dimension. For example, if + `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with + the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D + and have the shape (n0, n1). + extrapolate : {bool, 'periodic', None}, optional + If bool, determines whether to extrapolate to out-of-bounds points + based on first and last intervals, or to return NaNs. If 'periodic', + periodic extrapolation is used. If None (default), ``extrapolate`` is + set to 'periodic' for ``bc_type='periodic'`` and to True otherwise. + + See Also + -------- + scipy.interpolate.CubicHermiteSpline + + Returns + ------- + y : scalar or array_like + The result, of shape (m,) + + References + ---------- + .. [1] `Cubic Spline Interpolation + `_ + on Wikiversity. + .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978. + """ + from scipy import interpolate + + P = interpolate.CubicSpline( + xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate + ) + + return P(x) + + def interpolate_2d( values, method="pad", axis=0, limit=None, fill_value=None, dtype=None ): diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index 6844225a81a8f..ee3ea10ae099d 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -26,6 +26,7 @@ "from_derivatives", "pchip", "akima", + "cubicspline", ] ) def nontemporal_method(request): @@ -55,6 +56,7 @@ def nontemporal_method(request): "from_derivatives", "pchip", "akima", + "cubicspline", ] ) def interp_methods_ind(request): @@ -97,6 +99,22 @@ def test_interpolate_time_raises_for_non_timeseries(self): with pytest.raises(ValueError, match=msg): non_ts.interpolate(method="time") + @td.skip_if_no_scipy + def test_interpolate_cubicspline(self): + + ser = Series([10, 11, 12, 13]) + + expected = Series( + [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + interp_s = ser.reindex(new_index).interpolate(method="cubicspline") + tm.assert_series_equal(interp_s[1:3], expected) + @td.skip_if_no_scipy def test_interpolate_pchip(self): From 487fa7fb9d6e445991b9626f10aac01c4294ebed Mon Sep 17 00:00:00 2001 From: khyox Date: Mon, 20 Apr 2020 20:12:38 -0700 Subject: [PATCH 2/6] Remove try/except blocks (remnant of old SciPy releases support) Changes to be committed: modified: pandas/core/missing.py --- pandas/core/missing.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 6503fcc793696..2acaa808d8324 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -343,21 +343,11 @@ def _interpolate_scipy_wrapper( x, new_x = x._values.astype("i8"), new_x.astype("i8") if method == "pchip": - try: - alt_methods["pchip"] = interpolate.pchip_interpolate - except AttributeError as err: - raise ImportError( - "Your version of Scipy does not support PCHIP interpolation." - ) from err + alt_methods["pchip"] = interpolate.pchip_interpolate elif method == "akima": alt_methods["akima"] = _akima_interpolate elif method == "cubicspline": - try: - alt_methods["cubicspline"] = _cubicspline_interpolate - except AttributeError as err: - raise ImportError( - "Your version of Scipy does not support CubicSpline." - ) from err + alt_methods["cubicspline"] = _cubicspline_interpolate interp1d_methods = [ "nearest", From 7213e495b3296383cd899534528a19598a843bab Mon Sep 17 00:00:00 2001 From: khyox Date: Mon, 20 Apr 2020 23:00:31 -0700 Subject: [PATCH 3/6] Add whatsnew entry and update the docs Changes to be committed: modified: doc/source/whatsnew/v1.1.0.rst modified: pandas/core/generic.py --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/generic.py | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 07849702c646d..a98c2de29aeba 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -99,6 +99,7 @@ Other enhancements ``df.to_csv(path, compression={'method': 'gzip', 'compresslevel': 1}`` (:issue:`33196`) - :meth:`~pandas.core.groupby.GroupBy.transform` has gained ``engine`` and ``engine_kwargs`` arguments that supports executing functions with ``Numba`` (:issue:`32854`) +- :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` - .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2f35a5b6f9a7e..6a9ad2a9f6393 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -591,7 +591,7 @@ def swapaxes(self: FrameOrSeries, axis1, axis2, copy=True) -> FrameOrSeries: # ignore needed because of NDFrame constructor is different than # DataFrame/Series constructors. - return self._constructor(new_values, *new_axes).__finalize__( # type: ignore + return self._constructor(new_values, *new_axes).__finalize__( self, method="swapaxes" ) @@ -4039,7 +4039,7 @@ def add_prefix(self: FrameOrSeries, prefix: str) -> FrameOrSeries: f = functools.partial("{prefix}{}".format, prefix=prefix) mapper = {self._info_axis_name: f} - return self.rename(**mapper) # type: ignore + return self.rename(**mapper) def add_suffix(self: FrameOrSeries, suffix: str) -> FrameOrSeries: """ @@ -4098,7 +4098,7 @@ def add_suffix(self: FrameOrSeries, suffix: str) -> FrameOrSeries: f = functools.partial("{}{suffix}".format, suffix=suffix) mapper = {self._info_axis_name: f} - return self.rename(**mapper) # type: ignore + return self.rename(**mapper) def sort_values( self, @@ -6671,9 +6671,9 @@ def replace( values of the index. Both 'polynomial' and 'spline' require that you also specify an `order` (int), e.g. ``df.interpolate(method='polynomial', order=5)``. - * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima': - Wrappers around the SciPy interpolation methods of similar - names. See `Notes`. + * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', + 'cubicspline': Wrappers around the SciPy interpolation methods of + similar names. See `Notes`. * 'from_derivatives': Refers to `scipy.interpolate.BPoly.from_derivatives` which replaces 'piecewise_polynomial' interpolation method in From 1bdf55e0b742c4c07e3af4264a1acaa6b8baf3a8 Mon Sep 17 00:00:00 2001 From: khyox Date: Wed, 22 Apr 2020 22:54:00 -0700 Subject: [PATCH 4/6] Add PR number to the whatsnew entry On branch CubicSpline modified: doc/source/whatsnew/v1.1.0.rst --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a98c2de29aeba..cd1cb0b64f74a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -99,7 +99,7 @@ Other enhancements ``df.to_csv(path, compression={'method': 'gzip', 'compresslevel': 1}`` (:issue:`33196`) - :meth:`~pandas.core.groupby.GroupBy.transform` has gained ``engine`` and ``engine_kwargs`` arguments that supports executing functions with ``Numba`` (:issue:`32854`) -- :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` +- :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` (:issue:`33670`) - .. --------------------------------------------------------------------------- From 9c489711e2cd030ac8149d86d765e52debc06b92 Mon Sep 17 00:00:00 2001 From: khyox Date: Wed, 22 Apr 2020 23:47:44 -0700 Subject: [PATCH 5/6] Test: Clean final assert expression On branch CubicSpline modified: pandas/tests/series/methods/test_interpolate.py --- pandas/tests/series/methods/test_interpolate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index ee3ea10ae099d..b26cb21bc5f3d 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -112,8 +112,8 @@ def test_interpolate_cubicspline(self): new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( float ) - interp_s = ser.reindex(new_index).interpolate(method="cubicspline") - tm.assert_series_equal(interp_s[1:3], expected) + result = ser.reindex(new_index).interpolate(method="cubicspline")[1:3] + tm.assert_series_equal(result, expected) @td.skip_if_no_scipy def test_interpolate_pchip(self): From 3fee8c081e5f3c7723ea401b1aafd2bdad935f6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Manuel=20Mart=C3=AD?= Date: Thu, 23 Apr 2020 07:41:46 -0700 Subject: [PATCH 6/6] Restore ignore types for mypy ver 0.730 used in CI Not needed for mypy version 0.740 --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6a9ad2a9f6393..9b4854d8024a8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -591,7 +591,7 @@ def swapaxes(self: FrameOrSeries, axis1, axis2, copy=True) -> FrameOrSeries: # ignore needed because of NDFrame constructor is different than # DataFrame/Series constructors. - return self._constructor(new_values, *new_axes).__finalize__( + return self._constructor(new_values, *new_axes).__finalize__( # type: ignore self, method="swapaxes" ) @@ -4039,7 +4039,7 @@ def add_prefix(self: FrameOrSeries, prefix: str) -> FrameOrSeries: f = functools.partial("{prefix}{}".format, prefix=prefix) mapper = {self._info_axis_name: f} - return self.rename(**mapper) + return self.rename(**mapper) # type: ignore def add_suffix(self: FrameOrSeries, suffix: str) -> FrameOrSeries: """ @@ -4098,7 +4098,7 @@ def add_suffix(self: FrameOrSeries, suffix: str) -> FrameOrSeries: f = functools.partial("{}{suffix}".format, suffix=suffix) mapper = {self._info_axis_name: f} - return self.rename(**mapper) + return self.rename(**mapper) # type: ignore def sort_values( self,