Skip to content

DEPR: resample/groupby.pad/backfill in favor of ffill/bfill #45076

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,10 @@ Other Deprecations
- Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
- Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`)
- Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`)
-
- Deprecated :meth:`.Groupby.pad` in favor of :meth:`.Groupby.ffill` (:issue:`33396`)
- Deprecated :meth:`.Groupby.backfill` in favor of :meth:`.Groupby.bfill` (:issue:`33396`)
- Deprecated :meth:`.Resample.pad` in favor of :meth:`.Resample.ffill` (:issue:`33396`)
- Deprecated :meth:`.Resample.backfill` in favor of :meth:`.Resample.bfill` (:issue:`33396`)

.. ---------------------------------------------------------------------------

Expand Down
38 changes: 28 additions & 10 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2547,7 +2547,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:

@final
@Substitution(name="groupby")
def pad(self, limit=None):
def ffill(self, limit=None):
"""
Forward fill the values.

Expand All @@ -2563,18 +2563,27 @@ def pad(self, limit=None):

See Also
--------
Series.pad: Returns Series with minimum number of char in object.
DataFrame.pad: Object with missing values filled or None if inplace=True.
Series.ffill: Returns Series with minimum number of char in object.
DataFrame.ffill: Object with missing values filled or None if inplace=True.
Series.fillna: Fill NaN values of a Series.
DataFrame.fillna: Fill NaN values of a DataFrame.
"""
return self._fill("ffill", limit=limit)

ffill = pad
def pad(self, limit=None):
warnings.warn(
"pad is deprecated and will be removed in a future version. "
"Use ffill instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return self.ffill(limit=limit)

pad.__doc__ = ffill.__doc__

@final
@Substitution(name="groupby")
def backfill(self, limit=None):
def bfill(self, limit=None):
"""
Backward fill the values.

Expand All @@ -2590,14 +2599,23 @@ def backfill(self, limit=None):

See Also
--------
Series.backfill : Backward fill the missing values in the dataset.
DataFrame.backfill: Backward fill the missing values in the dataset.
Series.bfill : Backward fill the missing values in the dataset.
DataFrame.bfill: Backward fill the missing values in the dataset.
Series.fillna: Fill NaN values of a Series.
DataFrame.fillna: Fill NaN values of a DataFrame.
"""
return self._fill("bfill", limit=limit)

bfill = backfill
def backfill(self, limit=None):
warnings.warn(
"backfill is deprecated and will be removed in a future version. "
"Use bfill instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return self.bfill(limit=limit)

backfill.__doc__ = bfill.__doc__

@final
@Substitution(name="groupby")
Expand Down Expand Up @@ -3435,7 +3453,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
@final
@Substitution(name="groupby")
@Appender(_common_see_also)
def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0):
def pct_change(self, periods=1, fill_method="ffill", limit=None, freq=None, axis=0):
"""
Calculate pct_change of each value to previous entry in group.

Expand All @@ -3457,7 +3475,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0
)
)
if fill_method is None: # GH30463
fill_method = "pad"
fill_method = "ffill"
limit = 0
filled = getattr(self, fill_method)(limit=limit)
fill_grp = filled.groupby(self.grouper.codes, axis=self.axis)
Expand Down
46 changes: 33 additions & 13 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
final,
no_type_check,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -40,6 +41,7 @@
deprecate_nonkeyword_arguments,
doc,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.generic import (
ABCDataFrame,
Expand Down Expand Up @@ -509,7 +511,7 @@ def _wrap_result(self, result):

return result

def pad(self, limit=None):
def ffill(self, limit=None):
"""
Forward fill the values.

Expand All @@ -527,9 +529,18 @@ def pad(self, limit=None):
Series.fillna: Fill NA/NaN values using the specified method.
DataFrame.fillna: Fill NA/NaN values using the specified method.
"""
return self._upsample("pad", limit=limit)
return self._upsample("ffill", limit=limit)

def pad(self, limit=None):
warnings.warn(
"pad is deprecated and will be removed in a future version. "
"Use ffill instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return self.ffill(limit=limit)

ffill = pad
pad.__doc__ = ffill.__doc__

def nearest(self, limit=None):
"""
Expand Down Expand Up @@ -591,7 +602,7 @@ def nearest(self, limit=None):
"""
return self._upsample("nearest", limit=limit)

def backfill(self, limit=None):
def bfill(self, limit=None):
"""
Backward fill the new missing values in the resampled data.

Expand All @@ -618,7 +629,7 @@ def backfill(self, limit=None):
fillna : Fill NaN values using the specified method, which can be
'backfill'.
nearest : Fill NaN values with nearest neighbor starting from center.
pad : Forward fill NaN values.
ffill : Forward fill NaN values.
Series.fillna : Fill NaN values in the Series using the
specified method, which can be 'backfill'.
DataFrame.fillna : Fill NaN values in the DataFrame using the
Expand All @@ -640,15 +651,15 @@ def backfill(self, limit=None):
2018-01-01 02:00:00 3
Freq: H, dtype: int64

>>> s.resample('30min').backfill()
>>> s.resample('30min').bfill()
2018-01-01 00:00:00 1
2018-01-01 00:30:00 2
2018-01-01 01:00:00 2
2018-01-01 01:30:00 3
2018-01-01 02:00:00 3
Freq: 30T, dtype: int64

>>> s.resample('15min').backfill(limit=2)
>>> s.resample('15min').bfill(limit=2)
2018-01-01 00:00:00 1.0
2018-01-01 00:15:00 NaN
2018-01-01 00:30:00 2.0
Expand All @@ -671,15 +682,15 @@ def backfill(self, limit=None):
2018-01-01 01:00:00 NaN 3
2018-01-01 02:00:00 6.0 5

>>> df.resample('30min').backfill()
>>> df.resample('30min').bfill()
a b
2018-01-01 00:00:00 2.0 1
2018-01-01 00:30:00 NaN 3
2018-01-01 01:00:00 NaN 3
2018-01-01 01:30:00 6.0 5
2018-01-01 02:00:00 6.0 5

>>> df.resample('15min').backfill(limit=2)
>>> df.resample('15min').bfill(limit=2)
a b
2018-01-01 00:00:00 2.0 1.0
2018-01-01 00:15:00 NaN NaN
Expand All @@ -691,9 +702,18 @@ def backfill(self, limit=None):
2018-01-01 01:45:00 6.0 5.0
2018-01-01 02:00:00 6.0 5.0
"""
return self._upsample("backfill", limit=limit)
return self._upsample("bfill", limit=limit)

def backfill(self, limit=None):
warnings.warn(
"backfill is deprecated and will be removed in a future version. "
"Use bfill instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return self.bfill(limit=limit)

bfill = backfill
backfill.__doc__ = bfill.__doc__

def fillna(self, method, limit=None):
"""
Expand Down Expand Up @@ -727,8 +747,8 @@ def fillna(self, method, limit=None):

See Also
--------
backfill : Backward fill NaN values in the resampled data.
pad : Forward fill NaN values in the resampled data.
bfill : Backward fill NaN values in the resampled data.
ffill : Forward fill NaN values in the resampled data.
nearest : Fill NaN values in the resampled data
with nearest neighbor starting from center.
interpolate : Fill NaN values using interpolation.
Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/apply/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,11 @@ def test_agg_cython_table_transform_frame(df, func, expected, axis):
@pytest.mark.parametrize("op", series_transform_kernels)
def test_transform_groupby_kernel_series(string_series, op):
# GH 35964

# TODO(2.0) Remove after pad/backfill deprecation enforced
if op == "backfill":
op = "bfill"
elif op == "pad":
op = "ffill"
args = [0.0] if op == "fillna" else []
ones = np.ones(string_series.shape[0])
expected = string_series.groupby(ones).transform(op, *args)
Expand All @@ -257,6 +261,11 @@ def test_transform_groupby_kernel_series(string_series, op):
def test_transform_groupby_kernel_frame(
axis, float_frame, op, using_array_manager, request
):
# TODO(2.0) Remove after pad/backfill deprecation enforced
if op == "backfill":
op = "bfill"
elif op == "pad":
op = "ffill"
# GH 35964
if using_array_manager and op == "pct_change" and axis in (1, "columns"):
# TODO(ArrayManager) shift with axis=1
Expand Down
15 changes: 14 additions & 1 deletion pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2269,7 +2269,11 @@ def test_groupby_duplicate_index():
def test_dup_labels_output_shape(groupby_func, idx):
if groupby_func in {"size", "ngroup", "cumcount"}:
pytest.skip("Not applicable")

# TODO(2.0) Remove after pad/backfill deprecation enforced
if groupby_func == "backfill":
groupby_func = "bfill"
elif groupby_func == "pad":
groupby_func = "ffill"
df = DataFrame([[1, 1]], columns=idx)
grp_by = df.groupby([0])

Expand Down Expand Up @@ -2614,3 +2618,12 @@ def test_rolling_wrong_param_min_period():
result_error_msg = r"__init__\(\) got an unexpected keyword argument 'min_period'"
with pytest.raises(TypeError, match=result_error_msg):
test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum()


def test_pad_backfill_deprecation():
# GH 33396
s = Series([1, 2, 3])
with tm.assert_produces_warning(FutureWarning, match="backfill"):
s.groupby(level=0).backfill()
with tm.assert_produces_warning(FutureWarning, match="pad"):
s.groupby(level=0).pad()
6 changes: 5 additions & 1 deletion pandas/tests/groupby/test_groupby_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ def test_groupby_preserves_subclass(obj, groupby_func):

if isinstance(obj, Series) and groupby_func in {"corrwith"}:
pytest.skip("Not applicable")

# TODO(2.0) Remove after pad/backfill deprecation enforced
if groupby_func == "backfill":
groupby_func = "bfill"
elif groupby_func == "pad":
groupby_func = "ffill"
grouped = obj.groupby(np.arange(0, 10))

# Groups should preserve subclass type
Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ def test_transform_axis_1(request, transformation_func, using_array_manager):
request.node.add_marker(
pytest.mark.xfail(reason="ArrayManager: shift axis=1 not yet implemented")
)
# TODO(2.0) Remove after pad/backfill deprecation enforced
if transformation_func == "backfill":
transformation_func = "bfill"
elif transformation_func == "pad":
transformation_func = "ffill"
warn = None
if transformation_func == "tshift":
warn = FutureWarning
Expand Down Expand Up @@ -357,7 +362,11 @@ def test_transform_transformation_func(request, transformation_func):
},
index=date_range("2020-01-01", "2020-01-07"),
)

# TODO(2.0) Remove after pad/backfill deprecation enforced
if transformation_func == "backfill":
transformation_func = "bfill"
elif transformation_func == "pad":
transformation_func = "ffill"
if transformation_func == "cumcount":
test_op = lambda x: x.transform("cumcount")
mock_op = lambda x: Series(range(len(x)), x.index)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def test_resample_upsample():
s = Series(np.random.rand(len(dti)), dti)

# to minutely, by padding
result = s.resample("Min").pad()
result = s.resample("Min").ffill()
assert len(result) == 12961
assert result[0] == s[0]
assert result[-1] == s[-1]
Expand Down Expand Up @@ -1810,7 +1810,7 @@ def test_resample_calendar_day_with_dst(
):
# GH 35219
ts = Series(1.0, date_range(first, last, freq=freq_in, tz="Europe/Amsterdam"))
result = ts.resample(freq_out).pad()
result = ts.resample(freq_out).ffill()
expected = Series(
1.0, date_range(first, exp_last, freq=freq_out, tz="Europe/Amsterdam")
)
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/resample/test_deprecated.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,12 @@ def test_interpolate_posargs_deprecation():

expected.index._data.freq = "3s"
tm.assert_series_equal(result, expected)


def test_pad_backfill_deprecation():
# GH 33396
s = Series([1, 2, 3], index=date_range("20180101", periods=3, freq="h"))
with tm.assert_produces_warning(FutureWarning, match="backfill"):
s.resample("30min").backfill()
with tm.assert_produces_warning(FutureWarning, match="pad"):
s.resample("30min").pad()
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_resampler_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def test_methods():
expected = g.B.apply(lambda x: getattr(x.resample("2s"), f)())
tm.assert_series_equal(result, expected)

for f in ["nearest", "backfill", "ffill", "asfreq"]:
for f in ["nearest", "bfill", "ffill", "asfreq"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
tm.assert_frame_equal(result, expected)
Expand Down