Skip to content

ENH: EA.interpolate #53659

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jun 28, 2023
1 change: 1 addition & 0 deletions doc/source/reference/extensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ objects.
api.extensions.ExtensionArray.factorize
api.extensions.ExtensionArray.fillna
api.extensions.ExtensionArray.insert
api.extensions.ExtensionArray.interpolate
api.extensions.ExtensionArray.isin
api.extensions.ExtensionArray.isna
api.extensions.ExtensionArray.ravel
Expand Down
20 changes: 20 additions & 0 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,26 @@ def closed(self) -> bool:

# Arguments for fillna()
FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"]
InterpolateOptions = Literal[
"linear",
"time",
"index",
"values",
"nearest",
"zero",
"slinear",
"quadratic",
"cubic",
"barycentric",
"polynomial",
"krogh",
"piecewise_polynomial",
"spline",
"pchip",
"akima",
"cubicspline",
"from_derivatives",
]

# internals
Manager = Union[
Expand Down
26 changes: 26 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
AxisInt,
Dtype,
FillnaOptions,
InterpolateOptions,
NumpySorter,
NumpyValueArrayLike,
PositionalIndexer,
Expand All @@ -90,6 +91,8 @@
npt,
)

from pandas import Index

_extension_array_shared_docs: dict[str, str] = {}


Expand Down Expand Up @@ -118,6 +121,7 @@ class ExtensionArray:
fillna
equals
insert
interpolate
isin
isna
ravel
Expand Down Expand Up @@ -155,6 +159,7 @@ class ExtensionArray:
* take
* copy
* _concat_same_type
* interpolate

A default repr displaying the type, (truncated) data, length,
and dtype is provided. It can be customized or replaced by
Expand Down Expand Up @@ -753,6 +758,27 @@ def argmax(self, skipna: bool = True) -> int:
raise NotImplementedError
return nargminmax(self, "argmax")

def interpolate(
self,
*,
method: InterpolateOptions,
axis: int,
index: Index | None,
limit,
limit_direction,
limit_area,
fill_value,
inplace: bool,
**kwargs,
) -> Self:
"""
See NDFrame.interpolate.__doc__.
"""
# NB: we return type(self) even if inplace=True
raise NotImplementedError(
f"{type(self).__name__} does not implement interpolate"
)

def fillna(
self,
value: object | ArrayLike | None = None,
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
Dtype,
DtypeObj,
F,
InterpolateOptions,
NpDtype,
PositionalIndexer2D,
PositionalIndexerTuple,
Expand Down Expand Up @@ -2233,7 +2234,7 @@ def copy(self, order: str = "C") -> Self:
def interpolate(
self,
*,
method,
method: InterpolateOptions,
axis: int,
index: Index | None,
limit,
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from pandas._typing import (
AxisInt,
Dtype,
InterpolateOptions,
NpDtype,
Scalar,
Self,
Expand Down Expand Up @@ -227,7 +228,7 @@ def _values_for_factorize(self) -> tuple[np.ndarray, float | None]:
def interpolate(
self,
*,
method,
method: InterpolateOptions,
axis: int,
index: Index | None,
limit,
Expand Down
23 changes: 2 additions & 21 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
IgnoreRaise,
IndexKeyFunc,
IndexLabel,
InterpolateOptions,
IntervalClosedType,
JSONSerializable,
Level,
Expand Down Expand Up @@ -7658,27 +7659,7 @@ def replace(
@final
def interpolate(
self,
method: Literal[
"linear",
"time",
"index",
"values",
"pad",
"nearest",
"zero",
"slinear",
"quadratic",
"cubic",
"barycentric",
"polynomial",
"krogh",
"piecewise_polynomial",
"spline",
"pchip",
"akima",
"cubicspline",
"from_derivatives",
] = "linear",
method: InterpolateOptions = "linear",
*,
axis: Axis = 0,
limit: int | None = None,
Expand Down
111 changes: 44 additions & 67 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
F,
FillnaOptions,
IgnoreRaise,
InterpolateOptions,
QuantileInterpolation,
Self,
Shape,
Expand Down Expand Up @@ -1345,7 +1346,7 @@ def fillna(
def interpolate(
self,
*,
method: FillnaOptions = "pad",
method: FillnaOptions | InterpolateOptions = "pad",
axis: AxisInt = 0,
index: Index | None = None,
inplace: bool = False,
Expand All @@ -1365,17 +1366,8 @@ def interpolate(
return [self.copy(deep=False)]
return [self] if inplace else [self.copy()]

try:
m = missing.clean_fill_method(method)
except ValueError:
m = None
# error: Non-overlapping equality check (left operand type:
# "Literal['backfill', 'bfill', 'ffill', 'pad']", right
# operand type: "Literal['asfreq']")
if method == "asfreq": # type: ignore[comparison-overlap]
# clean_fill_method used to allow this
raise
if m is None and self.dtype == _dtype_obj:
# TODO(3.0): this case will not be reachable once GH#53638 is enforced
if not _interp_method_is_pad_or_backfill(method) and self.dtype == _dtype_obj:
# only deal with floats
# bc we already checked that can_hold_na, we don't have int dtype here
# test_interp_basic checks that we make a copy here
Expand Down Expand Up @@ -1407,10 +1399,11 @@ def interpolate(
else:
refs = self.refs

# Dispatch to the PandasArray method.
# We know self.array_values is a PandasArray bc EABlock overrides
new_values = cast(PandasArray, self.array_values).interpolate(
method=method,
# Dispatch to the EA method.
new_values = self.array_values.interpolate(
# error: Argument "method" to "interpolate" of "ExtensionArray" has
# incompatible type [...]
method=method, # type: ignore[arg-type]
axis=axis,
index=index,
limit=limit,
Expand All @@ -1420,7 +1413,7 @@ def interpolate(
inplace=arr_inplace,
**kwargs,
)
data = new_values._ndarray
data = extract_array(new_values, extract_numpy=True)

nb = self.make_block_same_class(data, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow)
Expand Down Expand Up @@ -1841,7 +1834,8 @@ def values_for_json(self) -> np.ndarray:
def interpolate(
self,
*,
method: FillnaOptions = "pad",
method: FillnaOptions | InterpolateOptions = "pad",
index: Index | None = None,
axis: int = 0,
inplace: bool = False,
limit: int | None = None,
Expand All @@ -1850,11 +1844,28 @@ def interpolate(
**kwargs,
):
values = self.values
if values.ndim == 2 and axis == 0:
# NDArrayBackedExtensionArray.fillna assumes axis=1
new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T

if not _interp_method_is_pad_or_backfill(method):
method = cast(InterpolateOptions, method)
return super().interpolate(
method=method,
index=index,
axis=axis,
inplace=inplace,
limit=limit,
fill_value=fill_value,
using_cow=using_cow,
**kwargs,
)
else:
new_values = values.fillna(value=fill_value, method=method, limit=limit)
method = cast(FillnaOptions, method)
if values.ndim == 2 and axis == 0:
# NDArrayBackedExtensionArray.fillna assumes axis=1
new_values = values.T.fillna(
value=fill_value, method=method, limit=limit
).T
else:
new_values = values.fillna(value=fill_value, method=method, limit=limit)
return self.make_block_same_class(new_values)


Expand Down Expand Up @@ -2248,51 +2259,6 @@ class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
def values_for_json(self) -> np.ndarray:
return self.values._ndarray

def interpolate(
self,
*,
method: FillnaOptions = "pad",
index: Index | None = None,
axis: int = 0,
inplace: bool = False,
limit: int | None = None,
fill_value=None,
using_cow: bool = False,
**kwargs,
):
values = self.values

# error: Non-overlapping equality check (left operand type:
# "Literal['backfill', 'bfill', 'ffill', 'pad']", right operand type:
# "Literal['linear']") [comparison-overlap]
if method == "linear": # type: ignore[comparison-overlap]
# TODO: GH#50950 implement for arbitrary EAs
refs = None
arr_inplace = inplace
if using_cow:
if inplace and not self.refs.has_reference():
refs = self.refs
else:
arr_inplace = False

new_values = self.values.interpolate(
method=method,
index=index,
axis=axis,
inplace=arr_inplace,
limit=limit,
fill_value=fill_value,
**kwargs,
)
return self.make_block_same_class(new_values, refs=refs)

elif values.ndim == 2 and axis == 0:
# NDArrayBackedExtensionArray.fillna assumes axis=1
new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T
else:
new_values = values.fillna(value=fill_value, method=method, limit=limit)
return self.make_block_same_class(new_values)


class DatetimeTZBlock(DatetimeLikeBlock):
"""implement a datetime64 block with a tz attribute"""
Expand Down Expand Up @@ -2606,3 +2572,14 @@ def external_values(values: ArrayLike) -> ArrayLike:
# TODO(CoW) we should also mark our ExtensionArrays as read-only

return values


def _interp_method_is_pad_or_backfill(method: str) -> bool:
try:
m = missing.clean_fill_method(method)
except ValueError:
m = None
if method == "asfreq":
# clean_fill_method used to allow this
raise
return m is not None
4 changes: 2 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
AxisInt,
Frequency,
IndexLabel,
QuantileInterpolation,
InterpolateOptions,
T,
TimedeltaConvertibleTypes,
TimeGrouperOrigin,
Expand Down Expand Up @@ -834,7 +834,7 @@ def fillna(self, method, limit: int | None = None):

def interpolate(
self,
method: QuantileInterpolation = "linear",
method: InterpolateOptions = "linear",
*,
axis: Axis = 0,
limit: int | None = None,
Expand Down