Skip to content

Commit b405904

Browse files
authored
fix bfill, ffill and pad when calling with df.interpolate with column… (#33959)
1 parent 4ba7f9a commit b405904

File tree

3 files changed

+44
-12
lines changed

3 files changed

+44
-12
lines changed

doc/source/whatsnew/v1.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,8 @@ Missing
893893
- Bug in :meth:`replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ``<NA>`` was raising a ``TypeError``. The method now handles this by ignoring ``<NA>`` values when doing the comparison for the replacement (:issue:`32621`)
894894
- Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ``<NA>`` for all ``False`` or all ``True`` values using the nulllable boolean dtype and with ``skipna=False`` (:issue:`33253`)
895895
- Clarified documentation on interpolate with method =akima. The ``der`` parameter must be scalar or None (:issue:`33426`)
896+
- :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`)
897+
- Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`)
896898

897899
MultiIndex
898900
^^^^^^^^^^

pandas/core/generic.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -6875,30 +6875,42 @@ def interpolate(
68756875
inplace = validate_bool_kwarg(inplace, "inplace")
68766876

68776877
axis = self._get_axis_number(axis)
6878+
index = self._get_axis(axis)
6879+
6880+
if isinstance(self.index, MultiIndex) and method != "linear":
6881+
raise ValueError(
6882+
"Only `method=linear` interpolation is supported on MultiIndexes."
6883+
)
6884+
6885+
# for the methods backfill, bfill, pad, ffill limit_direction and limit_area
6886+
# are being ignored, see gh-26796 for more information
6887+
if method in ["backfill", "bfill", "pad", "ffill"]:
6888+
return self.fillna(
6889+
method=method,
6890+
axis=axis,
6891+
inplace=inplace,
6892+
limit=limit,
6893+
downcast=downcast,
6894+
)
68786895

6896+
# Currently we need this to call the axis correctly inside the various
6897+
# interpolation methods
68796898
if axis == 0:
68806899
df = self
68816900
else:
68826901
df = self.T
68836902

6884-
if isinstance(df.index, MultiIndex) and method != "linear":
6885-
raise ValueError(
6886-
"Only `method=linear` interpolation is supported on MultiIndexes."
6887-
)
6888-
6889-
if df.ndim == 2 and np.all(df.dtypes == np.dtype(object)):
6903+
if self.ndim == 2 and np.all(self.dtypes == np.dtype(object)):
68906904
raise TypeError(
68916905
"Cannot interpolate with all object-dtype columns "
68926906
"in the DataFrame. Try setting at least one "
68936907
"column to a numeric dtype."
68946908
)
68956909

6896-
# create/use the index
68976910
if method == "linear":
68986911
# prior default
68996912
index = np.arange(len(df.index))
69006913
else:
6901-
index = df.index
69026914
methods = {"index", "values", "nearest", "time"}
69036915
is_numeric_or_datetime = (
69046916
is_numeric_dtype(index.dtype)

pandas/tests/frame/methods/test_interpolate.py

+22-4
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def test_interp_leading_nans(self, check_scipy):
202202
result = df.interpolate(method="polynomial", order=1)
203203
tm.assert_frame_equal(result, expected)
204204

205-
def test_interp_raise_on_only_mixed(self):
205+
def test_interp_raise_on_only_mixed(self, axis):
206206
df = DataFrame(
207207
{
208208
"A": [1, 2, np.nan, 4],
@@ -212,8 +212,13 @@ def test_interp_raise_on_only_mixed(self):
212212
"E": [1, 2, 3, 4],
213213
}
214214
)
215-
with pytest.raises(TypeError):
216-
df.interpolate(axis=1)
215+
msg = (
216+
"Cannot interpolate with all object-dtype columns "
217+
"in the DataFrame. Try setting at least one "
218+
"column to a numeric dtype."
219+
)
220+
with pytest.raises(TypeError, match=msg):
221+
df.astype("object").interpolate(axis=axis)
217222

218223
def test_interp_raise_on_all_object_dtype(self):
219224
# GH 22985
@@ -272,7 +277,6 @@ def test_interp_ignore_all_good(self):
272277
result = df[["B", "D"]].interpolate(downcast=None)
273278
tm.assert_frame_equal(result, df[["B", "D"]])
274279

275-
@pytest.mark.parametrize("axis", [0, 1])
276280
def test_interp_time_inplace_axis(self, axis):
277281
# GH 9687
278282
periods = 5
@@ -296,3 +300,17 @@ def test_interp_string_axis(self, axis_name, axis_number):
296300
result = df.interpolate(method="linear", axis=axis_name)
297301
expected = df.interpolate(method="linear", axis=axis_number)
298302
tm.assert_frame_equal(result, expected)
303+
304+
@pytest.mark.parametrize("method", ["ffill", "bfill", "pad"])
305+
def test_interp_fillna_methods(self, axis, method):
306+
# GH 12918
307+
df = DataFrame(
308+
{
309+
"A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0],
310+
"B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0],
311+
"C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0],
312+
}
313+
)
314+
expected = df.fillna(axis=axis, method=method)
315+
result = df.interpolate(method=method, axis=axis)
316+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)