diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 762f41b4049c2..baacc8c421414 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -227,6 +227,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) +- Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``ffill`` or ``bfill`` before calling ``pct_change`` instead (:issue:`53491`) - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`) - Deprecated 'quantile' keyword in :meth:`Rolling.quantile` and :meth:`Expanding.quantile`, renamed as 'q' instead (:issue:`52550`) - Deprecated :meth:`.DataFrameGroupBy.apply` and methods on the objects returned by :meth:`.DataFrameGroupBy.resample` operating on the grouping column(s); select the columns to operate on after groupby to either explicitly include or exclude the groupings and avoid the ``FutureWarning`` (:issue:`7155`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 91083f4018c06..f73ef36f76086 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11219,8 +11219,8 @@ def describe( def pct_change( self, periods: int = 1, - fill_method: Literal["backfill", "bfill", "pad", "ffill"] | None = "pad", - limit: int | None = None, + fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default, + limit: int | None | lib.NoDefault = lib.no_default, freq=None, **kwargs, ) -> Self: @@ -11244,8 +11244,14 @@ def pct_change( Periods to shift for forming percent change. fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad' How to handle NAs **before** computing percent changes. + + .. deprecated:: 2.1 + limit : int, default None The number of consecutive NAs to fill before stopping. + + .. deprecated:: 2.1 + freq : DateOffset, timedelta, or str, optional Increment to use from time series API (e.g. 'M' or BDay()). **kwargs @@ -11298,7 +11304,7 @@ def pct_change( 3 85.0 dtype: float64 - >>> s.pct_change(fill_method='ffill') + >>> s.ffill().pct_change() 0 NaN 1 0.011111 2 0.000000 @@ -11345,6 +11351,31 @@ def pct_change( GOOG 0.179241 0.094112 NaN APPL -0.252395 -0.011860 NaN """ + # GH#53491 + if fill_method is not lib.no_default or limit is not lib.no_default: + warnings.warn( + "The 'fill_method' and 'limit' keywords in " + f"{type(self).__name__}.pct_change are deprecated and will be " + "removed in a future version. Call " + f"{'bfill' if fill_method in ('backfill', 'bfill') else 'ffill'} " + "before calling pct_change instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if fill_method is lib.no_default: + if self.isna().values.any(): + warnings.warn( + "The default fill_method='pad' in " + f"{type(self).__name__}.pct_change is deprecated and will be " + "removed in a future version. Call ffill before calling " + "pct_change to retain current behavior and silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + fill_method = "pad" + if limit is lib.no_default: + limit = None + axis = self._get_axis_number(kwargs.pop("axis", "index")) if fill_method is None: data = self diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 4ef9b02e3afad..d1ca10ef91c2c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4787,8 +4787,8 @@ def diff( def pct_change( self, periods: int = 1, - fill_method: FillnaOptions = "ffill", - limit: int | None = None, + fill_method: FillnaOptions | lib.NoDefault = lib.no_default, + limit: int | None | lib.NoDefault = lib.no_default, freq=None, axis: Axis | lib.NoDefault = lib.no_default, ): @@ -4838,6 +4838,30 @@ def pct_change( catfish NaN NaN goldfish 0.2 0.125 """ + # GH#53491 + if fill_method is not lib.no_default or limit is not lib.no_default: + warnings.warn( + "The 'fill_method' and 'limit' keywords in " + f"{type(self).__name__}.pct_change are deprecated and will be " + "removed in a future version. Call " + f"{'bfill' if fill_method in ('backfill', 'bfill') else 'ffill'} " + "before calling pct_change instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if fill_method is lib.no_default: + if any(grp.isna().values.any() for _, grp in self): + warnings.warn( + "The default fill_method='ffill' in " + f"{type(self).__name__}.pct_change is deprecated and will be " + "removed in a future version. Call ffill before calling " + "pct_change to retain current behavior and silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + fill_method = "ffill" + if limit is lib.no_default: + limit = None if axis is not lib.no_default: axis = self.obj._get_axis_number(axis) diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py index 37d6361dec935..d0153da038a75 100644 --- a/pandas/tests/frame/methods/test_pct_change.py +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -10,7 +10,7 @@ class TestDataFramePctChange: @pytest.mark.parametrize( - "periods,fill_method,limit,exp", + "periods, fill_method, limit, exp", [ (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]), (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]), @@ -28,7 +28,12 @@ def test_pct_change_with_nas( vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] obj = frame_or_series(vals) - res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) + msg = ( + "The 'fill_method' and 'limit' keywords in " + f"{type(obj).__name__}.pct_change are deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) tm.assert_equal(res, frame_or_series(exp)) def test_pct_change_numeric(self): @@ -40,21 +45,34 @@ def test_pct_change_numeric(self): pnl.iat[1, 1] = np.nan pnl.iat[2, 3] = 60 + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" + ) + for axis in range(2): expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1 - result = pnl.pct_change(axis=axis, fill_method="pad") + with tm.assert_produces_warning(FutureWarning, match=msg): + result = pnl.pct_change(axis=axis, fill_method="pad") tm.assert_frame_equal(result, expected) def test_pct_change(self, datetime_frame): - rs = datetime_frame.pct_change(fill_method=None) + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" + ) + + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_frame.pct_change(fill_method=None) tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) rs = datetime_frame.pct_change(2) filled = datetime_frame.ffill() tm.assert_frame_equal(rs, filled / filled.shift(2) - 1) - rs = datetime_frame.pct_change(fill_method="bfill", limit=1) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_frame.pct_change(fill_method="bfill", limit=1) filled = datetime_frame.bfill(limit=1) tm.assert_frame_equal(rs, filled / filled.shift(1) - 1) @@ -69,7 +87,10 @@ def test_pct_change_shift_over_nas(self): df = DataFrame({"a": s, "b": s}) - chg = df.pct_change() + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + chg = df.pct_change() + expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) edf = DataFrame({"a": expected, "b": expected}) tm.assert_frame_equal(chg, edf) @@ -88,18 +109,31 @@ def test_pct_change_shift_over_nas(self): def test_pct_change_periods_freq( self, datetime_frame, freq, periods, fill_method, limit ): - # GH#7292 - rs_freq = datetime_frame.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - rs_periods = datetime_frame.pct_change( - periods, fill_method=fill_method, limit=limit + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" ) + + # GH#7292 + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_freq = datetime_frame.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_periods = datetime_frame.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_frame_equal(rs_freq, rs_periods) empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns) - rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_freq = empty_ts.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_periods = empty_ts.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_frame_equal(rs_freq, rs_periods) @@ -109,7 +143,14 @@ def test_pct_change_with_duplicated_indices(fill_method): data = DataFrame( {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3 ) - result = data.pct_change(fill_method=fill_method) + + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = data.pct_change(fill_method=fill_method) + if fill_method is None: second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] else: diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 269fda8fbf361..ab268a1d94b96 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -622,8 +622,15 @@ def test_categorical_transformers( "x", dropna=False, observed=observed, sort=sort, as_index=as_index ) gb_dropna = df.groupby("x", dropna=True, observed=observed, sort=sort) - result = getattr(gb_keepna, transformation_func)(*args) + + msg = "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + result = getattr(gb_keepna, "pct_change")(*args) + else: + result = getattr(gb_keepna, transformation_func)(*args) expected = getattr(gb_dropna, transformation_func)(*args) + for iloc, value in zip( df[df["x"].isnull()].index.tolist(), null_group_result.values.ravel() ): diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index eb7e6c154afc9..397500f64787f 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -408,11 +408,24 @@ def mock_op(x): test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() - result = test_op(df.groupby("A")) + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" + groupby_msg = ( + "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + ) + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=groupby_msg): + result = test_op(df.groupby("A")) + else: + result = test_op(df.groupby("A")) + # pass the group in same order as iterating `for ... in df.groupby(...)` # but reorder to match df's index since this is a transform groups = [df[["B"]].iloc[4:6], df[["B"]].iloc[6:], df[["B"]].iloc[:4]] - expected = concat([mock_op(g) for g in groups]).sort_index() + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = concat([mock_op(g) for g in groups]).sort_index() + else: + expected = concat([mock_op(g) for g in groups]).sort_index() # sort_index does not preserve the freq expected = expected.set_axis(df.index) @@ -973,9 +986,14 @@ def test_pct_change(frame_or_series, freq, periods, fill_method, limit): else: expected = expected.to_frame("vals") - result = gb.pct_change( - periods=periods, fill_method=fill_method, limit=limit, freq=freq + msg = ( + "The 'fill_method' and 'limit' keywords in " + f"{type(gb).__name__}.pct_change are deprecated" ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq + ) tm.assert_equal(result, expected) @@ -1412,7 +1430,12 @@ def test_null_group_str_transformer(request, dropna, transformation_func): # ngroup/cumcount always returns a Series as it counts the groups, not values expected = expected["B"].rename(None) - result = gb.transform(transformation_func, *args) + msg = "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + if transformation_func == "pct_change" and not dropna: + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.transform("pct_change", *args) + else: + result = gb.transform(transformation_func, *args) tm.assert_equal(result, expected) diff --git a/pandas/tests/series/methods/test_pct_change.py b/pandas/tests/series/methods/test_pct_change.py index 475d729b6ce78..38a42062b275e 100644 --- a/pandas/tests/series/methods/test_pct_change.py +++ b/pandas/tests/series/methods/test_pct_change.py @@ -10,14 +10,21 @@ class TestSeriesPctChange: def test_pct_change(self, datetime_series): - rs = datetime_series.pct_change(fill_method=None) + msg = ( + "The 'fill_method' and 'limit' keywords in " + "Series.pct_change are deprecated" + ) + + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_series.pct_change(fill_method=None) tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1) rs = datetime_series.pct_change(2) filled = datetime_series.ffill() tm.assert_series_equal(rs, filled / filled.shift(2) - 1) - rs = datetime_series.pct_change(fill_method="bfill", limit=1) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_series.pct_change(fill_method="bfill", limit=1) filled = datetime_series.bfill(limit=1) tm.assert_series_equal(rs, filled / filled.shift(1) - 1) @@ -40,7 +47,10 @@ def test_pct_change_with_duplicate_axis(self): def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) - chg = s.pct_change() + msg = "The default fill_method='pad' in Series.pct_change is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + chg = s.pct_change() + expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) tm.assert_series_equal(chg, expected) @@ -58,18 +68,31 @@ def test_pct_change_shift_over_nas(self): def test_pct_change_periods_freq( self, freq, periods, fill_method, limit, datetime_series ): - # GH#7292 - rs_freq = datetime_series.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - rs_periods = datetime_series.pct_change( - periods, fill_method=fill_method, limit=limit + msg = ( + "The 'fill_method' and 'limit' keywords in " + "Series.pct_change are deprecated" ) + + # GH#7292 + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_freq = datetime_series.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_periods = datetime_series.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_series_equal(rs_freq, rs_periods) empty_ts = Series(index=datetime_series.index, dtype=object) - rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_freq = empty_ts.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_periods = empty_ts.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_series_equal(rs_freq, rs_periods) @@ -77,6 +100,10 @@ def test_pct_change_periods_freq( def test_pct_change_with_duplicated_indices(fill_method): # GH30463 s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3) - result = s.pct_change(fill_method=fill_method) + + msg = "The 'fill_method' and 'limit' keywords in Series.pct_change are deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.pct_change(fill_method=fill_method) + expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3) tm.assert_series_equal(result, expected)