Skip to content

WIP: avoid internals for Series.interpolate #34628

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 2 additions & 32 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6890,11 +6890,6 @@ def interpolate(
axis = self._get_axis_number(axis)
index = self._get_axis(axis)

if isinstance(self.index, MultiIndex) and method != "linear":
raise ValueError(
"Only `method=linear` interpolation is supported on MultiIndexes."
)

# for the methods backfill, bfill, pad, ffill limit_direction and limit_area
# are being ignored, see gh-26796 for more information
if method in ["backfill", "bfill", "pad", "ffill"]:
Expand All @@ -6913,39 +6908,14 @@ def interpolate(
else:
df = self.T

if self.ndim == 2 and np.all(self.dtypes == np.dtype(object)):
if np.all(self.dtypes == np.dtype(object)):
raise TypeError(
"Cannot interpolate with all object-dtype columns "
"in the DataFrame. Try setting at least one "
"column to a numeric dtype."
)

if method == "linear":
# prior default
index = np.arange(len(df.index))
else:
methods = {"index", "values", "nearest", "time"}
is_numeric_or_datetime = (
is_numeric_dtype(index.dtype)
or is_datetime64_any_dtype(index.dtype)
or is_timedelta64_dtype(index.dtype)
)
if method not in methods and not is_numeric_or_datetime:
raise ValueError(
"Index column must be numeric or datetime type when "
f"using {method} method other than linear. "
"Try setting a numeric or datetime index column before "
"interpolating."
)

if isna(index).any():
raise NotImplementedError(
"Interpolation with NaNs in the index "
"has not been implemented. Try filling "
"those NaNs before interpolating."
)
data = df._mgr
new_data = data.interpolate(
new_data = df._mgr.interpolate(
method=method,
axis=self._info_axis_number,
index=index,
Expand Down
15 changes: 5 additions & 10 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,13 +1104,10 @@ def interpolate(
coerce=coerce,
downcast=downcast,
)
# validate the interp method
m = missing.clean_interp_method(method, **kwargs)

assert index is not None # for mypy

return self._interpolate(
method=m,
method=method,
index=index,
axis=axis,
limit=limit,
Expand Down Expand Up @@ -1178,6 +1175,9 @@ def _interpolate(
inplace = validate_bool_kwarg(inplace, "inplace")
data = self.values if inplace else self.values.copy()

# validate the interp method and get xvalues
method, xvalues = missing.clean_interp_method(method, index, **kwargs)

# only deal with floats
if not self.is_float:
if not self.is_integer:
Expand All @@ -1187,11 +1187,6 @@ def _interpolate(
if fill_value is None:
fill_value = self.fill_value

if method in ("krogh", "piecewise_polynomial", "pchip"):
if not index.is_monotonic:
raise ValueError(
f"{method} interpolation requires that the index be monotonic."
)
# process 1-d slices in the axis direction

def func(yvalues: np.ndarray) -> np.ndarray:
Expand All @@ -1200,7 +1195,7 @@ def func(yvalues: np.ndarray) -> np.ndarray:
# should the axis argument be handled below in apply_along_axis?
# i.e. not an arg to missing.interpolate_1d
return missing.interpolate_1d(
xvalues=index,
xvalues=xvalues,
yvalues=yvalues,
method=method,
limit=limit,
Expand Down
72 changes: 59 additions & 13 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Routines for filling missing data.
"""

from typing import Any, List, Optional, Set, Union
from typing import TYPE_CHECKING, Any, List, Optional, Set, Tuple, Union

import numpy as np

Expand All @@ -12,16 +12,22 @@
from pandas.core.dtypes.cast import infer_dtype_from_array
from pandas.core.dtypes.common import (
ensure_float64,
is_datetime64_any_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_integer_dtype,
is_numeric_dtype,
is_numeric_v_string_like,
is_scalar,
is_timedelta64_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.generic import ABCMultiIndex
from pandas.core.dtypes.missing import isna

if TYPE_CHECKING:
from pandas import Index


def mask_missing(arr, values_to_mask):
"""
Expand Down Expand Up @@ -94,7 +100,16 @@ def clean_fill_method(method, allow_nearest=False):
return method


def clean_interp_method(method: str, **kwargs) -> str:
def clean_interp_method(
method: str, index: "Index", **kwargs
) -> Tuple[str, np.ndarray]:
"""
Validate Index and order keyword for interpolation methods.

Returns
-------
tuple of str, np.ndarray
"""
order = kwargs.get("order")
valid = [
"linear",
Expand All @@ -120,8 +135,49 @@ def clean_interp_method(method: str, **kwargs) -> str:
raise ValueError("You must specify the order of the spline or polynomial.")
if method not in valid:
raise ValueError(f"method must be one of {valid}. Got '{method}' instead.")
if method in ("krogh", "piecewise_polynomial", "pchip"):
if not index.is_monotonic:
raise ValueError(
f"{method} interpolation requires that the index be monotonic."
)
elif method == "time":
if not getattr(index, "is_all_dates", None):
raise ValueError(
"time-weighted interpolation only works "
"on Series or DataFrames with a DatetimeIndex"
)
method = "values"

return method
if method == "linear":
xvalues = np.arange(len(index))
else:
if isinstance(index, ABCMultiIndex):
raise ValueError(
"Only `method=linear` interpolation is supported on MultiIndexes."
)

methods = {"index", "values", "nearest", "time"}
is_numeric_or_datetime = (
is_numeric_dtype(index.dtype)
or is_datetime64_any_dtype(index.dtype)
or is_timedelta64_dtype(index.dtype)
)
if method not in methods and not is_numeric_or_datetime:
raise ValueError(
"Index column must be numeric or datetime type when "
f"using {method} method other than linear. "
"Try setting a numeric or datetime index column before "
"interpolating."
)
if isna(index).any():
raise NotImplementedError(
"Interpolation with NaNs in the index "
"has not been implemented. Try filling "
"those NaNs before interpolating."
)
xvalues = index.values

return method, xvalues


def find_valid_index(values, how: str):
Expand Down Expand Up @@ -195,16 +251,6 @@ def interpolate_1d(
if valid.all():
return yvalues

if method == "time":
if not getattr(xvalues, "is_all_dates", None):
# if not issubclass(xvalues.dtype.type, np.datetime64):
raise ValueError(
"time-weighted interpolation only works "
"on Series or DataFrames with a "
"DatetimeIndex"
)
method = "values"

valid_limit_directions = ["forward", "backward", "both"]
limit_direction = limit_direction.lower()
if limit_direction not in valid_limit_directions:
Expand Down
54 changes: 54 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from pandas.core.dtypes.cast import (
convert_dtypes,
maybe_cast_to_extension_array,
maybe_downcast_to_dtype,
validate_numeric_casting,
)
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -92,6 +93,7 @@
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.indexing import check_bool_indexer
from pandas.core.internals import SingleBlockManager
import pandas.core.missing as missing
from pandas.core.sorting import ensure_key_mapped
from pandas.core.strings import StringMethods
from pandas.core.tools.datetimes import to_datetime
Expand Down Expand Up @@ -2237,6 +2239,58 @@ def quantile(self, q=0.5, interpolation="linear"):
# scalar
return result.iloc[0]

@doc(NDFrame.interpolate)
def interpolate(
self,
method: str = "linear",
axis: Axis = 0,
limit: Optional[int] = None,
inplace: bool = False,
limit_direction: str = "forward",
limit_area: Optional[str] = None,
downcast: Optional[str] = None,
**kwargs,
) -> Optional["Series"]:
inplace = validate_bool_kwarg(inplace, "inplace")

axis = self._get_axis_number(axis)

# for the methods backfill, bfill, pad, ffill limit_direction and limit_area
# are being ignored, see gh-26796 for more information
if method in ["backfill", "bfill", "pad", "ffill"]:
return self.fillna(
method=method,
axis=axis,
inplace=inplace,
limit=limit,
downcast=downcast,
)

# validate the interp method and get xvalues
method, xvalues = missing.clean_interp_method(method, self.index, **kwargs)

arr = missing.interpolate_1d(
xvalues,
self.values,
method=method,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
bounds_error=False,
**kwargs,
)

if downcast is not None:
arr = maybe_downcast_to_dtype(arr, dtype=downcast)

result = self._constructor(arr, index=self.index, fastpath=True)

if inplace:
self._update_inplace(result)
return None
else:
return result.__finalize__(self, method="interpolate")

def corr(self, other, method="pearson", min_periods=None) -> float:
"""
Compute correlation with `other` Series, excluding missing values.
Expand Down