diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index adc1806523d6e..503c3c37790fd 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -225,6 +225,7 @@ Missing ^^^^^^^ - Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`) +- Bug in :meth:`Series.interpolate` where kwarg ``limit_area`` and ``limit_direction`` had no effect when using methods ``pad`` and ``backfill`` (:issue:`31048`) - MultiIndex diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a28b341669918..9c856e1da9f3d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1693,9 +1693,9 @@ def fillna(self, value=None, method=None, limit=None): # TODO: dispatch when self.categories is EA-dtype values = np.asarray(self).reshape(-1, len(self)) - values = interpolate_2d(values, method, 0, None, value).astype( - self.categories.dtype - )[0] + values = interpolate_2d( + values, method=method, axis=0, limit=None, fill_value=value, + ).astype(self.categories.dtype)[0] codes = _get_codes_for_values(values, self.categories) else: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c62be4f767f00..1d4569d4d0f53 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1127,6 +1127,7 @@ def interpolate( axis=axis, inplace=inplace, limit=limit, + limit_area=limit_area, fill_value=fill_value, coerce=coerce, downcast=downcast, @@ -1155,6 +1156,7 @@ def _interpolate_with_fill( axis: int = 0, inplace: bool = False, limit: Optional[int] = None, + limit_area: Optional[str] = None, fill_value: Optional[Any] = None, coerce: bool = False, downcast: Optional[str] = None, @@ -1181,6 +1183,7 @@ def _interpolate_with_fill( method=method, axis=axis, limit=limit, + limit_area=limit_area, fill_value=fill_value, dtype=self.dtype, ) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 7802c5cbdbfb3..ca9c225118792 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -7,6 +7,7 @@ import numpy as np from pandas._libs import algos, lib +from pandas._typing import Axis, Dtype, Hashable from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array @@ -543,12 +544,62 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat def interpolate_2d( - values, method="pad", axis=0, limit=None, fill_value=None, dtype=None + values, + method: str = "pad", + axis: Axis = 0, + limit: Optional[int] = None, + limit_area: Optional[str] = None, + fill_value: Optional[Hashable] = None, + dtype: Optional[Dtype] = None, ): """ Perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result. """ + + # `limit_area` is not supported by `pad_2d` and `backfill_2d`. Hence, the + # following code block does a recursive call and applies the interpolation + # and `limit_area` logic along a certain axis. + if limit_area is not None: + + def func(values): + invalid = isna(values) + + if not invalid.any(): + return values + + if not invalid.all(): + first = find_valid_index(values, "first") + last = find_valid_index(values, "last") + + values = interpolate_2d( + values, + method=method, + limit=limit, + fill_value=fill_value, + dtype=dtype, + ) + + if limit_area == "inside": + invalid[first : last + 1] = False + elif limit_area == "outside": + invalid[:first] = False + invalid[last + 1 :] = False + + values[invalid] = np.nan + else: + values = interpolate_2d( + values, + method=method, + limit=limit, + fill_value=fill_value, + dtype=dtype, + ) + return values + + values = np.apply_along_axis(func, axis, values) + return values + orig_values = values transf = (lambda x: x) if axis == 0 else (lambda x: x.T) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index c4b10e0ccdc3e..ae9908d81e6cb 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -450,6 +450,66 @@ def test_interp_limit_direction_raises(self, method, limit_direction, expected): with pytest.raises(ValueError, match=msg): s.interpolate(method=method, limit_direction=limit_direction) + def test_interp_limit_area_with_pad(self): + # Test for issue #26796 + s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + + expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="pad", limit_area="inside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan] + ) + result = s.interpolate(method="pad", limit_area="inside", limit=1) + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) + result = s.interpolate(method="pad", limit_area="outside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] + ) + result = s.interpolate(method="pad", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + + # Test for all NaNs + s = Series([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]) + expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]) + result = s.interpolate(method="pad", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + + # Test for no NaNs + s = Series([1, 2, 3, 4]) + expected = Series([1, 2, 3, 4]) + result = s.interpolate(method="pad", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + + def test_interp_limit_area_with_backfill(self): + # Test for issue #26796 + s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + + expected = Series([np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="bfill", limit_area="inside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan] + ) + result = s.interpolate(method="bfill", limit_area="inside", limit=1) + tm.assert_series_equal(result, expected) + + expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) + result = s.interpolate(method="bfill", limit_area="outside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan] + ) + result = s.interpolate(method="bfill", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + def test_interp_limit_direction(self): # These tests are for issue #9218 -- fill NaNs in both directions. s = Series([1, 3, np.nan, np.nan, np.nan, 11])