From d227f8abd074119e67d0768f5456429cd2de95e3 Mon Sep 17 00:00:00 2001 From: TrigonaMinima Date: Thu, 21 Feb 2019 11:34:44 +0530 Subject: [PATCH] BUG: pd.Series.interpolate non-numeric index column (21662) --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/core/generic.py | 20 +++++++-- pandas/tests/series/test_missing.py | 68 +++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index ccf5c43280765..98fb2af19be64 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -222,7 +222,7 @@ Numeric - Bug in :meth:`to_numeric` in which numbers were being coerced to float, even though ``errors`` was not ``coerce`` (:issue:`24910`) - Bug in error messages in :meth:`DataFrame.corr` and :meth:`Series.corr`. Added the possibility of using a callable. (:issue:`25729`) - Bug in :meth:`Series.divmod` and :meth:`Series.rdivmod` which would raise an (incorrect) ``ValueError`` rather than return a pair of :class:`Series` objects as result (:issue:`25557`) -- +- Raises a helpful exception when a non-numeric index is sent to :meth:`interpolate` with methods which require numeric index. (:issue:`21662`) - - diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1717b00664f92..de237d32235ca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -25,10 +25,10 @@ from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.common import ( ensure_int64, ensure_object, is_bool, is_bool_dtype, - is_datetime64_any_dtype, is_datetime64tz_dtype, is_dict_like, - is_extension_array_dtype, is_integer, is_list_like, is_number, - is_numeric_dtype, is_object_dtype, is_period_arraylike, is_re_compilable, - is_scalar, is_timedelta64_dtype, pandas_dtype) + is_datetime64_any_dtype, is_datetime64_dtype, is_datetime64tz_dtype, + is_dict_like, is_extension_array_dtype, is_integer, is_list_like, + is_number, is_numeric_dtype, is_object_dtype, is_period_arraylike, + is_re_compilable, is_scalar, is_timedelta64_dtype, pandas_dtype) from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna, notna @@ -6863,6 +6863,18 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, index = np.arange(len(_maybe_transposed_self._get_axis(alt_ax))) else: index = _maybe_transposed_self._get_axis(alt_ax) + methods = {"index", "values", "nearest", "time"} + is_numeric_or_datetime = ( + is_numeric_dtype(index) or + is_datetime64_dtype(index) or + is_timedelta64_dtype(index) + ) + if method not in methods and not is_numeric_or_datetime: + raise ValueError( + "Index column must be numeric or datetime type when " + "using {method} method other than linear. " + "Try setting a numeric or datetime index column before " + "interpolating.".format(method=method)) if isna(index).any(): raise NotImplementedError("Interpolation with NaNs in the index " diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 2163914f915b2..403fdb383d81a 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -870,6 +870,22 @@ def nontemporal_method(request): return method, kwargs +@pytest.fixture(params=['linear', 'slinear', 'zero', 'quadratic', 'cubic', + 'barycentric', 'krogh', 'polynomial', 'spline', + 'piecewise_polynomial', 'from_derivatives', 'pchip', + 'akima', ]) +def interp_methods_ind(request): + """ Fixture that returns a (method name, required kwargs) pair to + be tested for various Index types. + + This fixture does not include methods - 'time', 'index', 'nearest', + 'values' as a parameterization + """ + method = request.param + kwargs = dict(order=1) if method in ('spline', 'polynomial') else dict() + return method, kwargs + + class TestSeriesInterpolateData(): def test_interpolate(self, datetime_series, string_series): ts = Series(np.arange(len(datetime_series), dtype=float), @@ -1397,3 +1413,55 @@ def test_nonzero_warning(self): ser = pd.Series([1, 0, 3, 4]) with tm.assert_produces_warning(FutureWarning): ser.nonzero() + + @pytest.mark.parametrize( + "ind", + [ + ['a', 'b', 'c', 'd'], + pd.period_range(start="2019-01-01", periods=4), + pd.interval_range(start=0, end=4), + ]) + def test_interp_non_timedelta_index(self, interp_methods_ind, ind): + # gh 21662 + df = pd.DataFrame([0, 1, np.nan, 3], index=ind) + + method, kwargs = interp_methods_ind + if method == "pchip": + _skip_if_no_pchip() + + if method == "linear": + result = df[0].interpolate(**kwargs) + expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) + assert_series_equal(result, expected) + else: + expected_error = ( + "Index column must be numeric or datetime type when " + "using {method} method other than linear. " + "Try setting a numeric or datetime index column before " + "interpolating.".format(method=method)) + with pytest.raises(ValueError, match=expected_error): + df[0].interpolate(method=method, **kwargs) + + def test_interpolate_timedelta_index(self, interp_methods_ind): + """ + Tests for non numerical index types - object, period, timedelta + Note that all methods except time, index, nearest and values + are tested here. + """ + # gh 21662 + ind = pd.timedelta_range(start=1, periods=4) + df = pd.DataFrame([0, 1, np.nan, 3], index=ind) + + method, kwargs = interp_methods_ind + if method == "pchip": + _skip_if_no_pchip() + + if method in {"linear", "pchip"}: + result = df[0].interpolate(method=method, **kwargs) + expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) + assert_series_equal(result, expected) + else: + pytest.skip( + "This interpolation method is not supported for " + "Timedelta Index yet." + )