Skip to content

fix bfill, ffill and pad when calling with df.interpolate with column… #33959

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 2, 2020
27 changes: 19 additions & 8 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6881,30 +6881,41 @@ def interpolate(
inplace = validate_bool_kwarg(inplace, "inplace")

axis = self._get_axis_number(axis)
index = self._get_axis(axis)

if isinstance(self.index, MultiIndex) and method != "linear":
raise ValueError(
"Only `method=linear` interpolation is supported on MultiIndexes."
)

if method in ["backfill", "bfill", "pad", "ffill"]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment here that for these methods, limit_direction and limit_area are being ignored and include a link to #26796

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

return self.fillna(
method=method,
axis=axis,
inplace=inplace,
limit=limit,
downcast=downcast,
)

# todo: change interpolation so that no transposing is necessary
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO

# Currently we need this to call the axis correctly inside the various
# interpolation methods
if axis == 0:
df = self
else:
df = self.T

if isinstance(df.index, MultiIndex) and method != "linear":
raise ValueError(
"Only `method=linear` interpolation is supported on MultiIndexes."
)

if df.ndim == 2 and np.all(df.dtypes == np.dtype(object)):
if self.ndim == 2 and np.all(self.dtypes == np.dtype(object)):
raise TypeError(
"Cannot interpolate with all object-dtype columns "
"in the DataFrame. Try setting at least one "
"column to a numeric dtype."
)

# create/use the index
if method == "linear":
# prior default
index = np.arange(len(df.index))
else:
index = df.index
methods = {"index", "values", "nearest", "time"}
is_numeric_or_datetime = (
is_numeric_dtype(index.dtype)
Expand Down
47 changes: 45 additions & 2 deletions pandas/tests/frame/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ def test_interp_leading_nans(self, check_scipy):
result = df.interpolate(method="polynomial", order=1)
tm.assert_frame_equal(result, expected)

def test_interp_raise_on_only_mixed(self):
@pytest.mark.parametrize("axis", [0, 1])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use the axis fixture (e.g. just pass axis as the arg)

def test_interp_raise_on_only_mixed(self, axis):
df = DataFrame(
{
"A": [1, 2, np.nan, 4],
Expand All @@ -213,7 +214,7 @@ def test_interp_raise_on_only_mixed(self):
}
)
with pytest.raises(TypeError):
df.interpolate(axis=1)
df.astype("object").interpolate(axis=axis)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this changed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check the error message here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


def test_interp_raise_on_all_object_dtype(self):
# GH 22985
Expand Down Expand Up @@ -296,3 +297,45 @@ def test_interp_string_axis(self, axis_name, axis_number):
result = df.interpolate(method="linear", axis=axis_name)
expected = df.interpolate(method="linear", axis=axis_number)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("axis", [0, 1])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

axis fixture

def test_interp_ffill(self, axis):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you parameterise these tests @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"])

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

# GH 33956
df = DataFrame(
{
"A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0],
"B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0],
"C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0],
}
)
expected = df.ffill(axis=axis)
result = df.interpolate(method="ffill", axis=axis)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("axis", [0, 1])
def test_interp_bfill(self, axis):
# GH 33956
df = DataFrame(
{
"A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0],
"B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0],
"C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0],
}
)
expected = df.bfill(axis=axis)
result = df.interpolate(method="bfill", axis=axis)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("axis", [0, 1])
def test_interp_pad(self, axis):
# GH 33956
df = DataFrame(
{
"A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0],
"B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0],
"C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0],
}
)
expected = df.fillna(method="pad", axis=axis)
result = df.interpolate(method="pad", axis=axis)
tm.assert_frame_equal(result, expected)