-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
fix bfill, ffill and pad when calling with df.interpolate with column… #33959
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
5ef4eba
4f3bcbc
fda8be0
94a738f
5b21ba5
1280b32
01fec55
baeae0c
502f827
42895db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6881,30 +6881,41 @@ def interpolate( | |
inplace = validate_bool_kwarg(inplace, "inplace") | ||
|
||
axis = self._get_axis_number(axis) | ||
index = self._get_axis(axis) | ||
|
||
if isinstance(self.index, MultiIndex) and method != "linear": | ||
raise ValueError( | ||
"Only `method=linear` interpolation is supported on MultiIndexes." | ||
) | ||
|
||
if method in ["backfill", "bfill", "pad", "ffill"]: | ||
return self.fillna( | ||
method=method, | ||
axis=axis, | ||
inplace=inplace, | ||
limit=limit, | ||
downcast=downcast, | ||
) | ||
|
||
# todo: change interpolation so that no transposing is necessary | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO |
||
# Currently we need this to call the axis correctly inside the various | ||
# interpolation methods | ||
if axis == 0: | ||
df = self | ||
else: | ||
df = self.T | ||
|
||
if isinstance(df.index, MultiIndex) and method != "linear": | ||
raise ValueError( | ||
"Only `method=linear` interpolation is supported on MultiIndexes." | ||
) | ||
|
||
if df.ndim == 2 and np.all(df.dtypes == np.dtype(object)): | ||
if self.ndim == 2 and np.all(self.dtypes == np.dtype(object)): | ||
raise TypeError( | ||
"Cannot interpolate with all object-dtype columns " | ||
"in the DataFrame. Try setting at least one " | ||
"column to a numeric dtype." | ||
) | ||
|
||
# create/use the index | ||
if method == "linear": | ||
# prior default | ||
index = np.arange(len(df.index)) | ||
else: | ||
index = df.index | ||
methods = {"index", "values", "nearest", "time"} | ||
is_numeric_or_datetime = ( | ||
is_numeric_dtype(index.dtype) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -202,7 +202,8 @@ def test_interp_leading_nans(self, check_scipy): | |
result = df.interpolate(method="polynomial", order=1) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_interp_raise_on_only_mixed(self): | ||
@pytest.mark.parametrize("axis", [0, 1]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use the axis fixture (e.g. just pass axis as the arg) |
||
def test_interp_raise_on_only_mixed(self, axis): | ||
df = DataFrame( | ||
{ | ||
"A": [1, 2, np.nan, 4], | ||
|
@@ -213,7 +214,7 @@ def test_interp_raise_on_only_mixed(self): | |
} | ||
) | ||
with pytest.raises(TypeError): | ||
df.interpolate(axis=1) | ||
df.astype("object").interpolate(axis=axis) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this changed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check the error message here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
def test_interp_raise_on_all_object_dtype(self): | ||
# GH 22985 | ||
|
@@ -296,3 +297,45 @@ def test_interp_string_axis(self, axis_name, axis_number): | |
result = df.interpolate(method="linear", axis=axis_name) | ||
expected = df.interpolate(method="linear", axis=axis_number) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("axis", [0, 1]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. axis fixture |
||
def test_interp_ffill(self, axis): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you parameterise these tests @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"]) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
# GH 33956 | ||
CloseChoice marked this conversation as resolved.
Show resolved
Hide resolved
|
||
df = DataFrame( | ||
{ | ||
"A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], | ||
"B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0], | ||
"C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], | ||
} | ||
) | ||
expected = df.ffill(axis=axis) | ||
result = df.interpolate(method="ffill", axis=axis) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("axis", [0, 1]) | ||
def test_interp_bfill(self, axis): | ||
# GH 33956 | ||
df = DataFrame( | ||
{ | ||
"A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], | ||
"B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0], | ||
"C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], | ||
} | ||
) | ||
expected = df.bfill(axis=axis) | ||
result = df.interpolate(method="bfill", axis=axis) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("axis", [0, 1]) | ||
def test_interp_pad(self, axis): | ||
# GH 33956 | ||
df = DataFrame( | ||
{ | ||
"A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], | ||
"B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0], | ||
"C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], | ||
} | ||
) | ||
expected = df.fillna(method="pad", axis=axis) | ||
result = df.interpolate(method="pad", axis=axis) | ||
tm.assert_frame_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add a comment here that for these methods, limit_direction and limit_area are being ignored and include a link to #26796
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done