From e82d4e890b2d3bb37264b4a8e7b9321a4bedd6e6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 3 Dec 2021 16:24:18 +0100 Subject: [PATCH 1/4] [ArrayManager] Array version of interpolate logic --- pandas/core/internals/array_manager.py | 10 +++- pandas/core/missing.py | 71 +++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 1cd9fe65407ba..9a377f35dce7a 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -89,6 +89,7 @@ new_block, to_native_types, ) +from pandas.core.missing import interpolate_array if TYPE_CHECKING: from pandas import Float64Index @@ -367,8 +368,13 @@ def diff(self: T, n: int, axis: int) -> T: axis = 0 return self.apply(algos.diff, n=n, axis=axis) - def interpolate(self: T, **kwargs) -> T: - return self.apply_with_block("interpolate", swap_axis=False, **kwargs) + def interpolate(self: T, axis: int = 0, **kwargs) -> T: + if axis == 0: + return self.apply(interpolate_array, **kwargs) + else: + return self.apply_with_block( + "interpolate", swap_axis=False, axis=axis, **kwargs + ) def shift(self: T, periods: int, axis: int, fill_value) -> T: if fill_value is lib.no_default: diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ede0878f15caa..a75960768a4fe 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -26,8 +26,13 @@ npt, ) from pandas.compat._optional import import_optional_dependency +from pandas.util._validators import validate_bool_kwarg -from pandas.core.dtypes.cast import infer_dtype_from +from pandas.core.dtypes.cast import ( + infer_dtype_from, + maybe_downcast_to_dtype, + soft_convert_objects, +) from pandas.core.dtypes.common import ( is_array_like, is_numeric_v_string_like, @@ -41,6 +46,7 @@ if TYPE_CHECKING: from pandas import Index + from pandas.core.arrays import ExtensionArray def check_value_size(value, mask: np.ndarray, length: int): @@ -973,3 +979,66 @@ def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.boo shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) strides = a.strides + (a.strides[-1],) return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) + + +def _maybe_downcast(arr: np.ndarray, downcast=None): + if arr.dtype == np.dtype(object): + if downcast is None: + arr = soft_convert_objects(arr, datetime=True, numeric=False) + + if downcast: + arr = maybe_downcast_to_dtype(arr, downcast) + return arr + + +def interpolate_array( + arr: np.ndarray | ExtensionArray, + method: str = "pad", + axis: int = 0, + index: Index | None = None, + inplace: bool = False, + limit: int | None = None, + limit_direction: str = "forward", + limit_area: str | None = None, + fill_value: Any | None = None, + coerce: bool = False, + downcast: str | None = None, + **kwargs, +) -> np.ndarray | ExtensionArray: + + inplace = validate_bool_kwarg(inplace, "inplace") + + # first check for extensionarrays + if not isinstance(arr, np.ndarray): + return arr.fillna(value=fill_value, method=method, limit=limit) + + if arr.dtype.kind in ["b", "i", "u"]: + # those dtypes can never hold NAs + # If there are no NAs, then interpolate is a no-op + return arr if inplace else arr.copy() + + try: + m = clean_fill_method(method) + except ValueError: + m = None + if m is None and arr.dtype.kind != "f": + # only deal with floats + # bc we already checked that can_hold_na, we dont have int dtype here + # TODO: make a copy if not inplace? + return arr + + data = arr if inplace else arr.copy() + + interp_values = interpolate_array_2d( + data, + method=method, + axis=axis, + index=index, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + **kwargs, + ) + + return _maybe_downcast(interp_values, downcast) From 7fa41338e26ea25b7fbf595bedd54c550b2d2615 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Dec 2021 09:21:53 +0100 Subject: [PATCH 2/4] update now interpolate_array_2d is inplace --- pandas/core/missing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index afba0727d6413..2ce8d0b32e820 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1031,7 +1031,7 @@ def interpolate_array( data = arr if inplace else arr.copy() - interp_values = interpolate_array_2d( + interpolate_array_2d( data, method=method, axis=axis, @@ -1043,4 +1043,4 @@ def interpolate_array( **kwargs, ) - return _maybe_downcast(interp_values, downcast) + return _maybe_downcast(data, downcast) From 2499c5e68dd4417c69fb556db3a979f91d73ca65 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Dec 2021 09:24:01 +0100 Subject: [PATCH 3/4] update typing --- pandas/core/missing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 2ce8d0b32e820..01dde23f4de9b 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -46,7 +46,6 @@ if TYPE_CHECKING: from pandas import Index - from pandas.core.arrays import ExtensionArray def check_value_size(value, mask: np.ndarray, length: int): @@ -994,7 +993,7 @@ def _maybe_downcast(arr: np.ndarray, downcast=None): def interpolate_array( - arr: np.ndarray | ExtensionArray, + arr: ArrayLike, method: str = "pad", axis: int = 0, index: Index | None = None, @@ -1006,7 +1005,7 @@ def interpolate_array( coerce: bool = False, downcast: str | None = None, **kwargs, -) -> np.ndarray | ExtensionArray: +) -> ArrayLike: inplace = validate_bool_kwarg(inplace, "inplace") From 44ebf701cf2cef332b04ece29f30eda55f03d664 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Dec 2021 11:40:16 +0100 Subject: [PATCH 4/4] update typing --- pandas/core/missing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 01dde23f4de9b..9d328975501a9 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -23,6 +23,7 @@ ArrayLike, Axis, F, + FillnaOptions, npt, ) from pandas.compat._optional import import_optional_dependency @@ -983,18 +984,17 @@ def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.boo def _maybe_downcast(arr: np.ndarray, downcast=None): - if arr.dtype == np.dtype(object): - if downcast is None: - arr = soft_convert_objects(arr, datetime=True, numeric=False) + if arr.dtype == np.dtype(object) and downcast is None: + return soft_convert_objects(arr, datetime=True, numeric=False) if downcast: - arr = maybe_downcast_to_dtype(arr, downcast) + return maybe_downcast_to_dtype(arr, downcast) return arr def interpolate_array( arr: ArrayLike, - method: str = "pad", + method: FillnaOptions = "pad", axis: int = 0, index: Index | None = None, inplace: bool = False,