Skip to content

fix bfill, ffill and pad when calling with df.interpolate with column… #33959

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 2, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ Backwards incompatible API changes
- Combining a ``Categorical`` with integer categories and which contains missing values
with a float dtype column in operations such as :func:`concat` or :meth:`~DataFrame.append`
will now result in a float column instead of an object dtyped column (:issue:`33607`)
- :meth:`DataFrame.interpolate` uses the correct axis convention. Previously interpolating along columns lead to interpolation along indices and vice versa.

``MultiIndex.get_indexer`` interprets `method` argument differently
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
29 changes: 21 additions & 8 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6881,30 +6881,43 @@ def interpolate(
inplace = validate_bool_kwarg(inplace, "inplace")

axis = self._get_axis_number(axis)
index = self._get_axis(axis)

if isinstance(self.index, MultiIndex) and method != "linear":
raise ValueError(
"Only `method=linear` interpolation is supported on MultiIndexes."
)

# for the methods backfill, bfill, pad, ffill limit_direction and limit_area
# are being ignored, see #26796 for more information
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

list as gh-26796

if method in ["backfill", "bfill", "pad", "ffill"]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment here that for these methods, limit_direction and limit_area are being ignored and include a link to #26796

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

return self.fillna(
method=method,
axis=axis,
inplace=inplace,
limit=limit,
downcast=downcast,
)

# todo: change interpolation so that no transposing is necessary
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO

# Currently we need this to call the axis correctly inside the various
# interpolation methods
if axis == 0:
df = self
else:
df = self.T

if isinstance(df.index, MultiIndex) and method != "linear":
raise ValueError(
"Only `method=linear` interpolation is supported on MultiIndexes."
)

if df.ndim == 2 and np.all(df.dtypes == np.dtype(object)):
if self.ndim == 2 and np.all(self.dtypes == np.dtype(object)):
raise TypeError(
"Cannot interpolate with all object-dtype columns "
"in the DataFrame. Try setting at least one "
"column to a numeric dtype."
)

# create/use the index
if method == "linear":
# prior default
index = np.arange(len(df.index))
else:
index = df.index
methods = {"index", "values", "nearest", "time"}
is_numeric_or_datetime = (
is_numeric_dtype(index.dtype)
Expand Down
20 changes: 18 additions & 2 deletions pandas/tests/frame/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ def test_interp_leading_nans(self, check_scipy):
result = df.interpolate(method="polynomial", order=1)
tm.assert_frame_equal(result, expected)

def test_interp_raise_on_only_mixed(self):
@pytest.mark.parametrize("axis", [0, 1])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use the axis fixture (e.g. just pass axis as the arg)

def test_interp_raise_on_only_mixed(self, axis):
df = DataFrame(
{
"A": [1, 2, np.nan, 4],
Expand All @@ -213,7 +214,7 @@ def test_interp_raise_on_only_mixed(self):
}
)
with pytest.raises(TypeError):
df.interpolate(axis=1)
df.astype("object").interpolate(axis=axis)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this changed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check the error message here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


def test_interp_raise_on_all_object_dtype(self):
# GH 22985
Expand Down Expand Up @@ -296,3 +297,18 @@ def test_interp_string_axis(self, axis_name, axis_number):
result = df.interpolate(method="linear", axis=axis_name)
expected = df.interpolate(method="linear", axis=axis_number)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("method", ["ffill", "bfill", "pad"])
@pytest.mark.parametrize("axis", [0, 1])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

axis fixture

def test_interp_fillna_methods(self, axis, method):
# GH 33956
df = DataFrame(
{
"A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0],
"B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0],
"C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0],
}
)
expected = df.fillna(axis=axis, method=method)
result = df.interpolate(method=method, axis=axis)
tm.assert_frame_equal(result, expected)