From a63dc31083514c32df279523d3c01254ef6b8151 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= Date: Fri, 8 Apr 2016 20:31:08 -0400 Subject: [PATCH] Implement Akima1DInterpolator --- ci/requirements-3.5_OSX.run | 1 - doc/source/conf.py | 1 + doc/source/missing_data.rst | 11 +++-- doc/source/whatsnew/v0.18.1.txt | 1 + pandas/core/generic.py | 8 +++- pandas/core/missing.py | 71 +++++++++++++++++++++++++++++---- pandas/tests/test_generic.py | 24 ++++++++++- 7 files changed, 103 insertions(+), 14 deletions(-) diff --git a/ci/requirements-3.5_OSX.run b/ci/requirements-3.5_OSX.run index 578f79243c0c0..ffa291ab7ff77 100644 --- a/ci/requirements-3.5_OSX.run +++ b/ci/requirements-3.5_OSX.run @@ -4,7 +4,6 @@ openpyxl xlsxwriter xlrd xlwt -scipy numexpr pytables html5lib diff --git a/doc/source/conf.py b/doc/source/conf.py index 709d9b32984c0..87510d13ee484 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -292,6 +292,7 @@ 'matplotlib': ('http://matplotlib.org/', None), 'python': ('http://docs.python.org/3', None), 'numpy': ('http://docs.scipy.org/doc/numpy', None), + 'scipy': ('http://docs.scipy.org/doc/scipy', None), 'py': ('http://pylib.readthedocs.org/en/latest/', None) } import glob diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index fcc8ac896b9f0..3ede97a902696 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -392,9 +392,12 @@ The ``method`` argument gives access to fancier interpolation methods. If you have scipy_ installed, you can set pass the name of a 1-d interpolation routine to ``method``. You'll want to consult the full scipy interpolation documentation_ and reference guide_ for details. The appropriate interpolation method will depend on the type of data you are working with. -For example, if you are dealing with a time series that is growing at an increasing rate, -``method='quadratic'`` may be appropriate. If you have values approximating a cumulative -distribution function, then ``method='pchip'`` should work well. + +* If you are dealing with a time series that is growing at an increasing rate, + ``method='quadratic'`` may be appropriate. +* If you have values approximating a cumulative distribution function, + then ``method='pchip'`` should work well. +* To fill missing values with goal of smooth plotting, use ``method='akima'``. .. warning:: @@ -406,6 +409,8 @@ distribution function, then ``method='pchip'`` should work well. df.interpolate(method='pchip') + df.interpolate(method='akima') + When interpolating via a polynomial or spline approximation, you must also specify the degree or order of the approximation: diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 8781c76118aa6..ba0cea94e1323 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -58,6 +58,7 @@ Other Enhancements - ``pd.read_csv()`` now supports opening ZIP files that contains a single CSV, via extension inference or explict ``compression='zip'`` (:issue:`12175`) - ``pd.read_csv()`` now supports opening files using xz compression, via extension inference or explicit ``compression='xz'`` is specified; ``xz`` compressions is also supported by ``DataFrame.to_csv`` in the same way (:issue:`11852`) - ``pd.read_msgpack()`` now always gives writeable ndarrays even when compression is used (:issue:`12359`). +- ``interpolate()`` now supports ``method='akima'`` (:issue:`7588`). - ``Index.take`` now handles ``allow_fill`` and ``fill_value`` consistently (:issue:`12631`) .. ipython:: python diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e450ac7e0cdc1..30252f7068424 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3451,7 +3451,8 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, ---------- method : {'linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', - 'polynomial', 'spline' 'piecewise_polynomial', 'pchip'} + 'polynomial', 'spline' 'piecewise_polynomial', 'pchip', + 'akima'} * 'linear': ignore the index and treat the values as equally spaced. This is the only method supported on MultiIndexes. @@ -3465,13 +3466,16 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, require that you also specify an `order` (int), e.g. df.interpolate(method='polynomial', order=4). These use the actual numerical values of the index. - * 'krogh', 'piecewise_polynomial', 'spline', and 'pchip' are all + * 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' are all wrappers around the scipy interpolation methods of similar names. These use the actual numerical values of the index. See the scipy documentation for more on their behavior `here `__ # noqa `and here `__ # noqa + .. versionadded:: 0.18.1 + Added support for the 'akima' method + axis : {0, 1}, default 0 * 0: fill column-by-column * 1: fill row-by-row diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 7ca96ef7b602e..dd78979a9da7c 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -82,7 +82,7 @@ def clean_interp_method(method, **kwargs): order = kwargs.get('order') valid = ['linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'polynomial', 'krogh', - 'piecewise_polynomial', 'pchip', 'spline'] + 'piecewise_polynomial', 'pchip', 'akima', 'spline'] if method in ('spline', 'polynomial') and order is None: raise ValueError("You must specify the order of the spline or " "polynomial.") @@ -188,7 +188,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', - 'piecewise_polynomial', 'pchip'] + 'piecewise_polynomial', 'pchip', 'akima'] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 @@ -232,12 +232,19 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, # GH 5975, scipy.interp1d can't hande datetime64s x, new_x = x._values.astype('i8'), new_x.astype('i8') - try: - alt_methods['pchip'] = interpolate.pchip_interpolate - except AttributeError: - if method == 'pchip': - raise ImportError("Your version of scipy does not support " + if method == 'pchip': + try: + alt_methods['pchip'] = interpolate.pchip_interpolate + except AttributeError: + raise ImportError("Your version of Scipy does not support " "PCHIP interpolation.") + elif method == 'akima': + try: + from scipy.interpolate import Akima1DInterpolator # noqa + alt_methods['akima'] = _akima_interpolate + except ImportError: + raise ImportError("Your version of Scipy does not support " + "Akima interpolation.") interp1d_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial'] @@ -267,6 +274,56 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, return new_y +def _akima_interpolate(xi, yi, x, der=0, axis=0): + """ + Convenience function for akima interpolation. + xi and yi are arrays of values used to approximate some function f, + with ``yi = f(xi)``. + + See `Akima1DInterpolator` for details. + + Parameters + ---------- + xi : array_like + A sorted list of x-coordinates, of length N. + yi : array_like + A 1-D array of real values. `yi`'s length along the interpolation + axis must be equal to the length of `xi`. If N-D array, use axis + parameter to select correct axis. + x : scalar or array_like + Of length M. + der : int or list, optional + How many derivatives to extract; None for all potentially + nonzero derivatives (that is a number equal to the number + of points), or a list of derivatives to extract. This number + includes the function value as 0th derivative. + axis : int, optional + Axis in the yi array corresponding to the x-coordinate values. + + See Also + -------- + scipy.interpolate.Akima1DInterpolator + + Returns + ------- + y : scalar or array_like + The result, of length R or length M or M by R, + + """ + from scipy import interpolate + try: + P = interpolate.Akima1DInterpolator(xi, yi, axis=axis) + except TypeError: + # Scipy earlier than 0.17.0 missing axis + P = interpolate.Akima1DInterpolator(xi, yi) + if der == 0: + return P(x) + elif interpolate._isscalar(der): + return P(x, der=der) + else: + return [P(x, nu) for nu in der] + + def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, dtype=None): """ perform an actual interpolation of values, values will be make 2-d if diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 7c31e71bbaf05..46678a72688aa 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -34,6 +34,13 @@ def _skip_if_no_pchip(): except ImportError: raise nose.SkipTest('scipy.interpolate.pchip missing') + +def _skip_if_no_akima(): + try: + from scipy.interpolate import Akima1DInterpolator # noqa + except ImportError: + raise nose.SkipTest('scipy.interpolate.Akima1DInterpolator missing') + # ---------------------------------------------------------------------- # Generic types test cases @@ -734,7 +741,7 @@ def test_interpolate(self): non_ts[0] = np.NaN self.assertRaises(ValueError, non_ts.interpolate, method='time') - def test_interp_regression(self): + def test_interpolate_pchip(self): tm._skip_if_no_scipy() _skip_if_no_pchip() @@ -747,6 +754,21 @@ def test_interp_regression(self): # does not blow up, GH5977 interp_s[49:51] + def test_interpolate_akima(self): + tm._skip_if_no_scipy() + _skip_if_no_akima() + + ser = Series([10, 11, 12, 13]) + + expected = Series([11.00, 11.25, 11.50, 11.75, + 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, + 2.0, 2.25, 2.5, 2.75, 3.0])) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])) + interp_s = ser.reindex(new_index).interpolate(method='akima') + assert_series_equal(interp_s[1:3], expected) + def test_interpolate_corners(self): s = Series([np.nan, np.nan]) assert_series_equal(s.interpolate(), s)