diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6183638ab587e..65b34fe157b71 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6890,11 +6890,6 @@ def interpolate( axis = self._get_axis_number(axis) index = self._get_axis(axis) - if isinstance(self.index, MultiIndex) and method != "linear": - raise ValueError( - "Only `method=linear` interpolation is supported on MultiIndexes." - ) - # for the methods backfill, bfill, pad, ffill limit_direction and limit_area # are being ignored, see gh-26796 for more information if method in ["backfill", "bfill", "pad", "ffill"]: @@ -6913,39 +6908,14 @@ def interpolate( else: df = self.T - if self.ndim == 2 and np.all(self.dtypes == np.dtype(object)): + if np.all(self.dtypes == np.dtype(object)): raise TypeError( "Cannot interpolate with all object-dtype columns " "in the DataFrame. Try setting at least one " "column to a numeric dtype." ) - if method == "linear": - # prior default - index = np.arange(len(df.index)) - else: - methods = {"index", "values", "nearest", "time"} - is_numeric_or_datetime = ( - is_numeric_dtype(index.dtype) - or is_datetime64_any_dtype(index.dtype) - or is_timedelta64_dtype(index.dtype) - ) - if method not in methods and not is_numeric_or_datetime: - raise ValueError( - "Index column must be numeric or datetime type when " - f"using {method} method other than linear. " - "Try setting a numeric or datetime index column before " - "interpolating." - ) - - if isna(index).any(): - raise NotImplementedError( - "Interpolation with NaNs in the index " - "has not been implemented. Try filling " - "those NaNs before interpolating." - ) - data = df._mgr - new_data = data.interpolate( + new_data = df._mgr.interpolate( method=method, axis=self._info_axis_number, index=index, diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e2a778f729470..31b38941c0153 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1104,13 +1104,10 @@ def interpolate( coerce=coerce, downcast=downcast, ) - # validate the interp method - m = missing.clean_interp_method(method, **kwargs) assert index is not None # for mypy - return self._interpolate( - method=m, + method=method, index=index, axis=axis, limit=limit, @@ -1178,6 +1175,9 @@ def _interpolate( inplace = validate_bool_kwarg(inplace, "inplace") data = self.values if inplace else self.values.copy() + # validate the interp method and get xvalues + method, xvalues = missing.clean_interp_method(method, index, **kwargs) + # only deal with floats if not self.is_float: if not self.is_integer: @@ -1187,11 +1187,6 @@ def _interpolate( if fill_value is None: fill_value = self.fill_value - if method in ("krogh", "piecewise_polynomial", "pchip"): - if not index.is_monotonic: - raise ValueError( - f"{method} interpolation requires that the index be monotonic." - ) # process 1-d slices in the axis direction def func(yvalues: np.ndarray) -> np.ndarray: @@ -1200,7 +1195,7 @@ def func(yvalues: np.ndarray) -> np.ndarray: # should the axis argument be handled below in apply_along_axis? # i.e. not an arg to missing.interpolate_1d return missing.interpolate_1d( - xvalues=index, + xvalues=xvalues, yvalues=yvalues, method=method, limit=limit, diff --git a/pandas/core/missing.py b/pandas/core/missing.py index d8671616f944e..cff663912acbe 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -2,7 +2,7 @@ Routines for filling missing data. """ -from typing import Any, List, Optional, Set, Union +from typing import TYPE_CHECKING, Any, List, Optional, Set, Tuple, Union import numpy as np @@ -12,16 +12,22 @@ from pandas.core.dtypes.cast import infer_dtype_from_array from pandas.core.dtypes.common import ( ensure_float64, + is_datetime64_any_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_integer_dtype, + is_numeric_dtype, is_numeric_v_string_like, is_scalar, is_timedelta64_dtype, needs_i8_conversion, ) +from pandas.core.dtypes.generic import ABCMultiIndex from pandas.core.dtypes.missing import isna +if TYPE_CHECKING: + from pandas import Index + def mask_missing(arr, values_to_mask): """ @@ -94,7 +100,16 @@ def clean_fill_method(method, allow_nearest=False): return method -def clean_interp_method(method: str, **kwargs) -> str: +def clean_interp_method( + method: str, index: "Index", **kwargs +) -> Tuple[str, np.ndarray]: + """ + Validate Index and order keyword for interpolation methods. + + Returns + ------- + tuple of str, np.ndarray + """ order = kwargs.get("order") valid = [ "linear", @@ -120,8 +135,49 @@ def clean_interp_method(method: str, **kwargs) -> str: raise ValueError("You must specify the order of the spline or polynomial.") if method not in valid: raise ValueError(f"method must be one of {valid}. Got '{method}' instead.") + if method in ("krogh", "piecewise_polynomial", "pchip"): + if not index.is_monotonic: + raise ValueError( + f"{method} interpolation requires that the index be monotonic." + ) + elif method == "time": + if not getattr(index, "is_all_dates", None): + raise ValueError( + "time-weighted interpolation only works " + "on Series or DataFrames with a DatetimeIndex" + ) + method = "values" - return method + if method == "linear": + xvalues = np.arange(len(index)) + else: + if isinstance(index, ABCMultiIndex): + raise ValueError( + "Only `method=linear` interpolation is supported on MultiIndexes." + ) + + methods = {"index", "values", "nearest", "time"} + is_numeric_or_datetime = ( + is_numeric_dtype(index.dtype) + or is_datetime64_any_dtype(index.dtype) + or is_timedelta64_dtype(index.dtype) + ) + if method not in methods and not is_numeric_or_datetime: + raise ValueError( + "Index column must be numeric or datetime type when " + f"using {method} method other than linear. " + "Try setting a numeric or datetime index column before " + "interpolating." + ) + if isna(index).any(): + raise NotImplementedError( + "Interpolation with NaNs in the index " + "has not been implemented. Try filling " + "those NaNs before interpolating." + ) + xvalues = index.values + + return method, xvalues def find_valid_index(values, how: str): @@ -195,16 +251,6 @@ def interpolate_1d( if valid.all(): return yvalues - if method == "time": - if not getattr(xvalues, "is_all_dates", None): - # if not issubclass(xvalues.dtype.type, np.datetime64): - raise ValueError( - "time-weighted interpolation only works " - "on Series or DataFrames with a " - "DatetimeIndex" - ) - method = "values" - valid_limit_directions = ["forward", "backward", "both"] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: diff --git a/pandas/core/series.py b/pandas/core/series.py index 71ffdcbd40fe7..ad9bf8c00f0ac 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -40,6 +40,7 @@ from pandas.core.dtypes.cast import ( convert_dtypes, maybe_cast_to_extension_array, + maybe_downcast_to_dtype, validate_numeric_casting, ) from pandas.core.dtypes.common import ( @@ -92,6 +93,7 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexing import check_bool_indexer from pandas.core.internals import SingleBlockManager +import pandas.core.missing as missing from pandas.core.sorting import ensure_key_mapped from pandas.core.strings import StringMethods from pandas.core.tools.datetimes import to_datetime @@ -2237,6 +2239,58 @@ def quantile(self, q=0.5, interpolation="linear"): # scalar return result.iloc[0] + @doc(NDFrame.interpolate) + def interpolate( + self, + method: str = "linear", + axis: Axis = 0, + limit: Optional[int] = None, + inplace: bool = False, + limit_direction: str = "forward", + limit_area: Optional[str] = None, + downcast: Optional[str] = None, + **kwargs, + ) -> Optional["Series"]: + inplace = validate_bool_kwarg(inplace, "inplace") + + axis = self._get_axis_number(axis) + + # for the methods backfill, bfill, pad, ffill limit_direction and limit_area + # are being ignored, see gh-26796 for more information + if method in ["backfill", "bfill", "pad", "ffill"]: + return self.fillna( + method=method, + axis=axis, + inplace=inplace, + limit=limit, + downcast=downcast, + ) + + # validate the interp method and get xvalues + method, xvalues = missing.clean_interp_method(method, self.index, **kwargs) + + arr = missing.interpolate_1d( + xvalues, + self.values, + method=method, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + bounds_error=False, + **kwargs, + ) + + if downcast is not None: + arr = maybe_downcast_to_dtype(arr, dtype=downcast) + + result = self._constructor(arr, index=self.index, fastpath=True) + + if inplace: + self._update_inplace(result) + return None + else: + return result.__finalize__(self, method="interpolate") + def corr(self, other, method="pearson", min_periods=None) -> float: """ Compute correlation with `other` Series, excluding missing values.