Skip to content

CLN: enforce deprecation of NDFrame.interpolate with ffill/bfill/pad/backfill methods #57869

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

35 changes: 4 additions & 31 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7630,7 +7630,6 @@ def interpolate(
* 'time': Works on daily and higher resolution data to interpolate
given length of interval.
* 'index', 'values': use the actual numerical values of the index.
* 'pad': Fill in NaNs using existing values.
* 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
'barycentric', 'polynomial': Passed to
`scipy.interpolate.interp1d`, whereas 'spline' is passed to
Expand All @@ -7654,23 +7653,9 @@ def interpolate(
0.
inplace : bool, default False
Update the data in place if possible.
limit_direction : {{'forward', 'backward', 'both'}}, Optional
limit_direction : {{'forward', 'backward', 'both'}}, optional, default 'forward'
Consecutive NaNs will be filled in this direction.

If limit is specified:
* If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'.
* If 'method' is 'backfill' or 'bfill', 'limit_direction' must be
'backwards'.

If 'limit' is not specified:
* If 'method' is 'backfill' or 'bfill', the default is 'backward'
* else the default is 'forward'

raises ValueError if `limit_direction` is 'forward' or 'both' and
method is 'backfill' or 'bfill'.
raises ValueError if `limit_direction` is 'backward' or 'both' and
method is 'pad' or 'ffill'.

limit_area : {{`None`, 'inside', 'outside'}}, default None
If limit is specified, consecutive NaNs will be filled with this
restriction.
Expand Down Expand Up @@ -7804,29 +7789,17 @@ def interpolate(
raise ValueError("'method' should be a string, not None.")

fillna_methods = ["ffill", "bfill", "pad", "backfill"]
if method.lower() in fillna_methods:
# GH#53581
warnings.warn(
f"{type(self).__name__}.interpolate with method={method} is "
"deprecated and will raise in a future version. "
"Use obj.ffill() or obj.bfill() instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
obj, should_transpose = self, False
else:
if method.lower() not in fillna_methods:
obj, should_transpose = (self.T, True) if axis == 1 else (self, False)
# GH#53631
if np.any(obj.dtypes == object):
raise TypeError(
f"{type(self).__name__} cannot interpolate with object dtype."
)

if method in fillna_methods and "fill_value" in kwargs:
else:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this case isn't needed either anymore either. The obj._mgr.interpolate call below should raise on invalid arguments. (Also note the # TODO(3.0): remove this case comment below which you can address in this PR)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, I moved this check for the bad methods below and removed the useless check (the one with the note # TODO(3.0): remove this case)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to be clear, here's the diff of the change I think you can fully clean up (I think we don't need any logic dealing with checking for a fillna_method):

--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7797,30 +7797,11 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
         if not isinstance(method, str):
             raise ValueError("'method' should be a string, not None.")
 
-        fillna_methods = ["ffill", "bfill", "pad", "backfill"]
-        if method.lower() in fillna_methods:
-            # GH#53581
-            warnings.warn(
-                f"{type(self).__name__}.interpolate with method={method} is "
-                "deprecated and will raise in a future version. "
-                "Use obj.ffill() or obj.bfill() instead.",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-            obj, should_transpose = self, False
-        else:
-            obj, should_transpose = (self.T, True) if axis == 1 else (self, False)
-                    f"{type(self).__name__} cannot interpolate with object dtype."
-                )
-
-        if method in fillna_methods and "fill_value" in kwargs:
-            raise ValueError(
-                "'fill_value' is not a valid keyword for "
-                f"{type(self).__name__}.interpolate with method from "
-                f"{fillna_methods}"
+        obj, should_transpose = (self.T, True) if axis == 1 else (self, False)
+        # GH#53631
+        if np.any(obj.dtypes == object):
+            raise TypeError(
+                f"{type(self).__name__} cannot interpolate with object dtype."
             )
 
         if isinstance(obj.index, MultiIndex) and method != "linear":
@@ -7830,34 +7811,16 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
 
         limit_direction = missing.infer_limit_direction(limit_direction, method)
 
-        if method.lower() in fillna_methods:
-            # TODO(3.0): remove this case
-            # TODO: warn/raise on limit_direction or kwargs which are ignored?
-            #  as of 2023-06-26 no tests get here with either
-            if not self._mgr.is_single_block and axis == 1:
-                # GH#53898
-                if inplace:
-                    raise NotImplementedError()
-                obj, axis, should_transpose = self.T, 1 - axis, True
-
-            new_data = obj._mgr.pad_or_backfill(
-                method=method,
-                axis=self._get_block_manager_axis(axis),
-                limit=limit,
-                limit_area=limit_area,
-                inplace=inplace,
-            )
-        else:
-            index = missing.get_interp_index(method, obj.index)
-            new_data = obj._mgr.interpolate(
-                method=method,
-                index=index,
-                limit=limit,
-                limit_direction=limit_direction,
-                limit_area=limit_area,
-                inplace=inplace,
-                **kwargs,
-            )
+        index = missing.get_interp_index(method, obj.index)
+        new_data = obj._mgr.interpolate(
+            method=method,
+            index=index,
+            limit=limit,
+            limit_direction=limit_direction,
+            limit_area=limit_area,
+            inplace=inplace,
+            **kwargs,
+        )

Copy link
Contributor Author

@natmokval natmokval Mar 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahhh, I think I understand now. I removed any logic dealing with checking for a fillna_methods from NDFrame.interpolate.
I think we need to check if method is valid in interpolate in class Block and to do the same check in get_interp_index. I corrected these methods and updated error messages in tests.

raise ValueError(
"'fill_value' is not a valid keyword for "
f"{type(self).__name__}.interpolate with method from "
f"{fillna_methods}"
f"{type(self).__name__} cannot interpolate with method={method}."
)

if isinstance(obj.index, MultiIndex) and method != "linear":
Expand Down
34 changes: 17 additions & 17 deletions pandas/tests/copy_view/test_interp_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,18 @@ def test_interpolate_no_op(method):
df = DataFrame({"a": [1, 2]})
df_orig = df.copy()

warn = None
if method == "pad":
warn = FutureWarning
msg = "DataFrame.interpolate with method=pad is deprecated"
with tm.assert_produces_warning(warn, match=msg):
msg = f"DataFrame cannot interpolate with method={method}"
with pytest.raises(ValueError, match=msg):
df.interpolate(method=method)
else:
result = df.interpolate(method=method)
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))

assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
result.iloc[0, 0] = 100

result.iloc[0, 0] = 100

assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
tm.assert_frame_equal(df, df_orig)
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
tm.assert_frame_equal(df, df_orig)


@pytest.mark.parametrize("func", ["ffill", "bfill"])
Expand Down Expand Up @@ -122,8 +121,8 @@ def test_interpolate_cannot_with_object_dtype():
def test_interpolate_object_convert_no_op():
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
arr_a = get_array(df, "a")
msg = "DataFrame.interpolate with method=pad is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
msg = "DataFrame cannot interpolate with method=pad"
with pytest.raises(ValueError, match=msg):
df.interpolate(method="pad", inplace=True)

# Now CoW makes a copy, it should not!
Expand All @@ -134,8 +133,8 @@ def test_interpolate_object_convert_no_op():
def test_interpolate_object_convert_copies():
df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
arr_a = get_array(df, "a")
msg = "DataFrame.interpolate with method=pad is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
msg = "DataFrame cannot interpolate with method=pad"
with pytest.raises(ValueError, match=msg):
df.interpolate(method="pad", inplace=True, downcast="infer")

assert df._mgr._has_no_reference(0)
Expand All @@ -147,12 +146,13 @@ def test_interpolate_downcast_reference_triggers_copy():
df_orig = df.copy()
arr_a = get_array(df, "a")
view = df[:]
msg = "DataFrame.interpolate with method=pad is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):

msg = "DataFrame cannot interpolate with method=pad"
with pytest.raises(ValueError, match=msg):
df.interpolate(method="pad", inplace=True, downcast="infer")
assert df._mgr._has_no_reference(0)
assert not np.shares_memory(arr_a, get_array(df, "a"))

assert df._mgr._has_no_reference(0)
assert not np.shares_memory(arr_a, get_array(df, "a"))
tm.assert_frame_equal(df_orig, view)


Expand Down
9 changes: 3 additions & 6 deletions pandas/tests/frame/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,12 +398,9 @@ def test_interp_fillna_methods(self, axis, multiblock, method):
df["D"] = np.nan
df["E"] = 1.0

method2 = method if method != "pad" else "ffill"
expected = getattr(df, method2)(axis=axis)
msg = f"DataFrame.interpolate with method={method} is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.interpolate(method=method, axis=axis)
tm.assert_frame_equal(result, expected)
msg = f"DataFrame cannot interpolate with method={method}"
with pytest.raises(ValueError, match=msg):
df.interpolate(method=method, axis=axis)

def test_interpolate_empty_df(self):
# GH#53199
Expand Down
104 changes: 28 additions & 76 deletions pandas/tests/series/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,11 +359,9 @@ def test_interp_invalid_method_and_value(self):
# GH#36624
ser = Series([1, 3, np.nan, 12, np.nan, 25])

msg = "'fill_value' is not a valid keyword for Series.interpolate"
msg2 = "Series.interpolate with method=pad"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
ser.interpolate(fill_value=3, method="pad")
msg2 = "Series cannot interpolate with method=pad"
with pytest.raises(ValueError, match=msg2):
ser.interpolate(fill_value=3, method="pad")

def test_interp_limit_forward(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
Expand Down Expand Up @@ -455,107 +453,70 @@ def test_interp_limit_area(self):
s.interpolate(method="linear", limit_area="abc")

@pytest.mark.parametrize(
"method, limit_direction, expected",
[
("pad", "backward", "forward"),
("ffill", "backward", "forward"),
("backfill", "forward", "backward"),
("bfill", "forward", "backward"),
("pad", "both", "forward"),
("ffill", "both", "forward"),
("backfill", "both", "backward"),
("bfill", "both", "backward"),
],
)
def test_interp_limit_direction_raises(self, method, limit_direction, expected):
# https://github.com/pandas-dev/pandas/pull/34746
s = Series([1, 2, 3])

msg = f"`limit_direction` must be '{expected}' for method `{method}`"
msg2 = "Series.interpolate with method="
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
s.interpolate(method=method, limit_direction=limit_direction)

@pytest.mark.parametrize(
"data, expected_data, kwargs",
"data, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
{"method": "pad", "limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "pad", "limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
{"method": "pad", "limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
{"method": "pad", "limit_area": "outside", "limit": 1},
),
(
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
{"method": "pad", "limit_area": "outside", "limit": 1},
),
(
range(5),
range(5),
{"method": "pad", "limit_area": "outside", "limit": 1},
),
),
)
def test_interp_limit_area_with_pad(self, data, expected_data, kwargs):
def test_interp_limit_area_with_pad(self, data, kwargs):
# GH26796

s = Series(data)
expected = Series(expected_data)
msg = "Series.interpolate with method=pad"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(**kwargs)
tm.assert_series_equal(result, expected)
msg = "Series cannot interpolate with method=pad"
with pytest.raises(ValueError, match=msg):
s.interpolate(**kwargs)

@pytest.mark.parametrize(
"data, expected_data, kwargs",
"data, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "outside", "limit": 1},
),
),
)
def test_interp_limit_area_with_backfill(self, data, expected_data, kwargs):
def test_interp_limit_area_with_backfill(self, data, kwargs):
# GH26796

s = Series(data)
expected = Series(expected_data)
msg = "Series.interpolate with method=bfill"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(**kwargs)
tm.assert_series_equal(result, expected)

msg = "Series cannot interpolate with method=bfill"
with pytest.raises(ValueError, match=msg):
s.interpolate(**kwargs)

def test_interp_limit_direction(self):
# These tests are for issue #9218 -- fill NaNs in both directions.
Expand Down Expand Up @@ -650,37 +611,28 @@ def test_interp_datetime64(self, method, tz_naive_fixture):
df = Series(
[1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture)
)
warn = None if method == "nearest" else FutureWarning
msg = "Series.interpolate with method=pad is deprecated"
with tm.assert_produces_warning(warn, match=msg):
result = df.interpolate(method=method)
if warn is not None:
# check the "use ffill instead" is equivalent
alt = df.ffill()
tm.assert_series_equal(result, alt)

expected = Series(
[1.0, 1.0, 3.0],
index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture),
)
tm.assert_series_equal(result, expected)
if method == "nearest":
result = df.interpolate(method=method)
expected = Series(
[1.0, 1.0, 3.0],
index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture),
)
tm.assert_series_equal(result, expected)
else:
msg = "Series cannot interpolate with method=pad"
with pytest.raises(ValueError, match=msg):
df.interpolate(method=method)

def test_interp_pad_datetime64tz_values(self):
# GH#27628 missing.interpolate_2d should handle datetimetz values
dti = date_range("2015-04-05", periods=3, tz="US/Central")
ser = Series(dti)
ser[1] = pd.NaT

msg = "Series.interpolate with method=pad is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.interpolate(method="pad")
# check the "use ffill instead" is equivalent
alt = ser.ffill()
tm.assert_series_equal(result, alt)

expected = Series(dti)
expected[1] = expected[0]
tm.assert_series_equal(result, expected)
msg = "Series cannot interpolate with method=pad"
with pytest.raises(ValueError, match=msg):
ser.interpolate(method="pad")

def test_interp_limit_no_nans(self):
# GH 7173
Expand Down
Loading