From dea36ba5115ebea1a9c0843626fbc0a31f50183d Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Sun, 4 Jun 2023 23:45:40 +0800 Subject: [PATCH 01/13] DEPR fill_method and limit keywords in DataFrame/Series pct_change --- pandas/core/generic.py | 27 +++++++- pandas/tests/frame/methods/test_pct_change.py | 66 +++++++++++++++---- .../tests/series/methods/test_pct_change.py | 46 +++++++++---- 3 files changed, 111 insertions(+), 28 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 90a0444872ec7..0704cf3d1dae1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11143,8 +11143,8 @@ def describe( def pct_change( self, periods: int = 1, - fill_method: Literal["backfill", "bfill", "pad", "ffill"] | None = "pad", - limit: int | None = None, + fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default, + limit: int | None | lib.NoDefault = lib.no_default, freq=None, **kwargs, ) -> Self: @@ -11168,8 +11168,14 @@ def pct_change( Periods to shift for forming percent change. fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad' How to handle NAs **before** computing percent changes. + + .. deprecated:: 2.1 + limit : int, default None The number of consecutive NAs to fill before stopping. + + .. deprecated:: 2.1 + freq : DateOffset, timedelta, or str, optional Increment to use from time series API (e.g. 'M' or BDay()). **kwargs @@ -11222,7 +11228,7 @@ def pct_change( 3 85.0 dtype: float64 - >>> s.pct_change(fill_method='ffill') + >>> s.fillna(method='ffill').pct_change() 0 NaN 1 0.011111 2 0.000000 @@ -11269,6 +11275,21 @@ def pct_change( GOOG 0.179241 0.094112 NaN APPL -0.252395 -0.011860 NaN """ + if fill_method is not lib.no_default or limit is not lib.no_default: + # GH#53491 + warnings.warn( + "The 'fill_method' and 'limit' keywords in " + f"{type(self).__name__}.pct_change are deprecated and will be " + "removed in a future version. Call fillna directly before " + "calling pct_change instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if fill_method is lib.no_default: + fill_method = "pad" + if limit is lib.no_default: + limit = None + axis = self._get_axis_number(kwargs.pop("axis", "index")) if fill_method is None: data = self diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py index a9699f8d15146..d53631e75587d 100644 --- a/pandas/tests/frame/methods/test_pct_change.py +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -10,7 +10,7 @@ class TestDataFramePctChange: @pytest.mark.parametrize( - "periods,fill_method,limit,exp", + "periods, fill_method, limit, exp", [ (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]), (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]), @@ -28,7 +28,12 @@ def test_pct_change_with_nas( vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] obj = frame_or_series(vals) - res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) + msg = ( + "The 'fill_method' and 'limit' keywords in " + f"{type(obj).__name__}.pct_change are deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) tm.assert_equal(res, frame_or_series(exp)) def test_pct_change_numeric(self): @@ -40,21 +45,34 @@ def test_pct_change_numeric(self): pnl.iat[1, 1] = np.nan pnl.iat[2, 3] = 60 + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" + ) + for axis in range(2): expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1 - result = pnl.pct_change(axis=axis, fill_method="pad") + with tm.assert_produces_warning(FutureWarning, match=msg): + result = pnl.pct_change(axis=axis, fill_method="pad") tm.assert_frame_equal(result, expected) def test_pct_change(self, datetime_frame): - rs = datetime_frame.pct_change(fill_method=None) + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" + ) + + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_frame.pct_change(fill_method=None) tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) rs = datetime_frame.pct_change(2) filled = datetime_frame.fillna(method="pad") tm.assert_frame_equal(rs, filled / filled.shift(2) - 1) - rs = datetime_frame.pct_change(fill_method="bfill", limit=1) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_frame.pct_change(fill_method="bfill", limit=1) filled = datetime_frame.fillna(method="bfill", limit=1) tm.assert_frame_equal(rs, filled / filled.shift(1) - 1) @@ -88,18 +106,31 @@ def test_pct_change_shift_over_nas(self): def test_pct_change_periods_freq( self, datetime_frame, freq, periods, fill_method, limit ): - # GH#7292 - rs_freq = datetime_frame.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - rs_periods = datetime_frame.pct_change( - periods, fill_method=fill_method, limit=limit + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" ) + + # GH#7292 + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_freq = datetime_frame.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_periods = datetime_frame.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_frame_equal(rs_freq, rs_periods) empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns) - rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_freq = empty_ts.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_periods = empty_ts.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_frame_equal(rs_freq, rs_periods) @@ -109,7 +140,14 @@ def test_pct_change_with_duplicated_indices(fill_method): data = DataFrame( {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3 ) - result = data.pct_change(fill_method=fill_method) + + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = data.pct_change(fill_method=fill_method) + if fill_method is None: second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] else: diff --git a/pandas/tests/series/methods/test_pct_change.py b/pandas/tests/series/methods/test_pct_change.py index 017fef5fdb31f..51447fc0283aa 100644 --- a/pandas/tests/series/methods/test_pct_change.py +++ b/pandas/tests/series/methods/test_pct_change.py @@ -10,14 +10,21 @@ class TestSeriesPctChange: def test_pct_change(self, datetime_series): - rs = datetime_series.pct_change(fill_method=None) + msg = ( + "The 'fill_method' and 'limit' keywords in " + "Series.pct_change are deprecated" + ) + + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_series.pct_change(fill_method=None) tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1) rs = datetime_series.pct_change(2) filled = datetime_series.fillna(method="pad") tm.assert_series_equal(rs, filled / filled.shift(2) - 1) - rs = datetime_series.pct_change(fill_method="bfill", limit=1) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_series.pct_change(fill_method="bfill", limit=1) filled = datetime_series.fillna(method="bfill", limit=1) tm.assert_series_equal(rs, filled / filled.shift(1) - 1) @@ -58,18 +65,31 @@ def test_pct_change_shift_over_nas(self): def test_pct_change_periods_freq( self, freq, periods, fill_method, limit, datetime_series ): - # GH#7292 - rs_freq = datetime_series.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - rs_periods = datetime_series.pct_change( - periods, fill_method=fill_method, limit=limit + msg = ( + "The 'fill_method' and 'limit' keywords in " + "Series.pct_change are deprecated" ) + + # GH#7292 + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_freq = datetime_series.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_periods = datetime_series.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_series_equal(rs_freq, rs_periods) empty_ts = Series(index=datetime_series.index, dtype=object) - rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_freq = empty_ts.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_periods = empty_ts.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_series_equal(rs_freq, rs_periods) @@ -77,6 +97,10 @@ def test_pct_change_periods_freq( def test_pct_change_with_duplicated_indices(fill_method): # GH30463 s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3) - result = s.pct_change(fill_method=fill_method) + + msg = "The 'fill_method' and 'limit' keywords in Series.pct_change are deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.pct_change(fill_method=fill_method) + expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3) tm.assert_series_equal(result, expected) From b9620252a2c923a7cdf3e4d038740930eb76b3ca Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Mon, 5 Jun 2023 00:05:30 +0800 Subject: [PATCH 02/13] DEPR fill_method and limit keywords in GroupBy pct_change --- pandas/core/groupby/groupby.py | 19 +++++++++++++++++-- .../tests/groupby/transform/test_transform.py | 9 +++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index caed6c9747d3b..d7d24940a0885 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3945,8 +3945,8 @@ def diff( def pct_change( self, periods: int = 1, - fill_method: FillnaOptions = "ffill", - limit: int | None = None, + fill_method: FillnaOptions | lib.NoDefault = lib.no_default, + limit: int | None | lib.NoDefault = lib.no_default, freq=None, axis: Axis | lib.NoDefault = lib.no_default, ): @@ -3958,6 +3958,21 @@ def pct_change( Series or DataFrame Percentage changes within each group. """ + if fill_method is not lib.no_default or limit is not lib.no_default: + # GH#53491 + warnings.warn( + "The 'fill_method' and 'limit' keywords in " + f"{type(self).__name__}.pct_change are deprecated and will be " + "removed in a future version. Call fillna directly before " + "calling pct_change instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if fill_method is lib.no_default: + fill_method = "ffill" + if limit is lib.no_default: + limit = None + if axis is not lib.no_default: axis = self.obj._get_axis_number(axis) self._deprecate_axis(axis, "pct_change") diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 09b24284d3b37..6f67a5aae03ba 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -969,9 +969,14 @@ def test_pct_change(frame_or_series, freq, periods, fill_method, limit): else: expected = expected.to_frame("vals") - result = gb.pct_change( - periods=periods, fill_method=fill_method, limit=limit, freq=freq + msg = ( + "The 'fill_method' and 'limit' keywords in " + f"{type(gb).__name__}.pct_change are deprecated" ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq + ) tm.assert_equal(result, expected) From 59ddbc6236aa0ab0072972d6d4ca16254f9dba1e Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Mon, 5 Jun 2023 00:09:09 +0800 Subject: [PATCH 03/13] changelog added --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 92124a536fe26..b97d5ea7ce1f4 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -223,6 +223,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) +- Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``fillna`` before calling ``pct_change`` instead (:issue:`53491`) - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`) - Deprecated 'quantile' keyword in :meth:`Rolling.quantile` and :meth:`Expanding.quantile`, renamed as 'q' instead (:issue:`52550`) - Deprecated :meth:`.DataFrameGroupBy.apply` and methods on the objects returned by :meth:`.DataFrameGroupBy.resample` operating on the grouping column(s); select the columns to operate on after groupby to either explicitly include or exclude the groupings and avoid the ``FutureWarning`` (:issue:`7155`) From fda3770f2aae8a45c4b4269a769a4f4b1b9f23dc Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Mon, 5 Jun 2023 17:07:34 +0800 Subject: [PATCH 04/13] DataFrame/Series pct_change only deprecate default --- pandas/core/generic.py | 21 ++--- pandas/tests/frame/methods/test_pct_change.py | 78 ++++++------------- 2 files changed, 34 insertions(+), 65 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0704cf3d1dae1..1f5ba8fe0a890 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11144,7 +11144,7 @@ def pct_change( self, periods: int = 1, fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default, - limit: int | None | lib.NoDefault = lib.no_default, + limit: int | None = None, freq=None, **kwargs, ) -> Self: @@ -11275,20 +11275,21 @@ def pct_change( GOOG 0.179241 0.094112 NaN APPL -0.252395 -0.011860 NaN """ - if fill_method is not lib.no_default or limit is not lib.no_default: - # GH#53491 + # GH#53491: deprecate default fill_method=False + # TODO: In 3.x, change default fill_method=None, then also in 3.x + # deprecate the fill_method and limit keywords, and finally remove + # them in 4.x + if fill_method is lib.no_default: warnings.warn( - "The 'fill_method' and 'limit' keywords in " - f"{type(self).__name__}.pct_change are deprecated and will be " - "removed in a future version. Call fillna directly before " - "calling pct_change instead.", + f"The default fill_method='pad' in {type(self).__name__}.pct_change " + "is deprecated and will be changed to None in a future version of " + "pandas. Pass fill_method='pad' to retain current behavior or " + "fill_method=None to adopt the future default and silence this " + "warning.", FutureWarning, stacklevel=find_stack_level(), ) - if fill_method is lib.no_default: fill_method = "pad" - if limit is lib.no_default: - limit = None axis = self._get_axis_number(kwargs.pop("axis", "index")) if fill_method is None: diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py index d53631e75587d..9ad61693ee608 100644 --- a/pandas/tests/frame/methods/test_pct_change.py +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -10,7 +10,7 @@ class TestDataFramePctChange: @pytest.mark.parametrize( - "periods, fill_method, limit, exp", + "periods,fill_method,limit,exp", [ (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]), (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]), @@ -28,12 +28,7 @@ def test_pct_change_with_nas( vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] obj = frame_or_series(vals) - msg = ( - "The 'fill_method' and 'limit' keywords in " - f"{type(obj).__name__}.pct_change are deprecated" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) + res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) tm.assert_equal(res, frame_or_series(exp)) def test_pct_change_numeric(self): @@ -45,38 +40,28 @@ def test_pct_change_numeric(self): pnl.iat[1, 1] = np.nan pnl.iat[2, 3] = 60 - msg = ( - "The 'fill_method' and 'limit' keywords in " - "DataFrame.pct_change are deprecated" - ) - for axis in range(2): expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1 - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = pnl.pct_change(axis=axis, fill_method="pad") + result = pnl.pct_change(axis=axis, fill_method="pad") tm.assert_frame_equal(result, expected) def test_pct_change(self, datetime_frame): - msg = ( - "The 'fill_method' and 'limit' keywords in " - "DataFrame.pct_change are deprecated" - ) + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = datetime_frame.pct_change(fill_method=None) + rs = datetime_frame.pct_change(fill_method=None) tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) - rs = datetime_frame.pct_change(2) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_frame.pct_change(2) filled = datetime_frame.fillna(method="pad") tm.assert_frame_equal(rs, filled / filled.shift(2) - 1) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = datetime_frame.pct_change(fill_method="bfill", limit=1) + rs = datetime_frame.pct_change(fill_method="bfill", limit=1) filled = datetime_frame.fillna(method="bfill", limit=1) tm.assert_frame_equal(rs, filled / filled.shift(1) - 1) - rs = datetime_frame.pct_change(freq="5D") + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_frame.pct_change(freq="5D") filled = datetime_frame.fillna(method="pad") tm.assert_frame_equal( rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) @@ -84,10 +69,12 @@ def test_pct_change(self, datetime_frame): def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) - df = DataFrame({"a": s, "b": s}) - chg = df.pct_change() + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + chg = df.pct_change() + expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) edf = DataFrame({"a": expected, "b": expected}) tm.assert_frame_equal(chg, edf) @@ -106,31 +93,18 @@ def test_pct_change_shift_over_nas(self): def test_pct_change_periods_freq( self, datetime_frame, freq, periods, fill_method, limit ): - msg = ( - "The 'fill_method' and 'limit' keywords in " - "DataFrame.pct_change are deprecated" - ) - # GH#7292 - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_freq = datetime_frame.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_periods = datetime_frame.pct_change( - periods, fill_method=fill_method, limit=limit - ) + rs_freq = datetime_frame.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + rs_periods = datetime_frame.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_frame_equal(rs_freq, rs_periods) empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_freq = empty_ts.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_periods = empty_ts.pct_change( - periods, fill_method=fill_method, limit=limit - ) + rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) + rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) tm.assert_frame_equal(rs_freq, rs_periods) @@ -140,13 +114,7 @@ def test_pct_change_with_duplicated_indices(fill_method): data = DataFrame( {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3 ) - - msg = ( - "The 'fill_method' and 'limit' keywords in " - "DataFrame.pct_change are deprecated" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = data.pct_change(fill_method=fill_method) + result = data.pct_change(fill_method=fill_method) if fill_method is None: second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] From 3cb930b301893ea4f4af0af16118b1b8a1c40bd8 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Mon, 5 Jun 2023 17:10:53 +0800 Subject: [PATCH 05/13] typo --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1f5ba8fe0a890..9b6d62fe9e3ca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11275,7 +11275,7 @@ def pct_change( GOOG 0.179241 0.094112 NaN APPL -0.252395 -0.011860 NaN """ - # GH#53491: deprecate default fill_method=False + # GH#53491: deprecate default fill_method="pad" # TODO: In 3.x, change default fill_method=None, then also in 3.x # deprecate the fill_method and limit keywords, and finally remove # them in 4.x From 829f9f96ff4e49775bc4cd0899ce59fef9390c0e Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Mon, 5 Jun 2023 18:48:36 +0800 Subject: [PATCH 06/13] GroupBy pct_change only deprecate default --- pandas/core/groupby/groupby.py | 21 ++--- .../tests/groupby/transform/test_transform.py | 80 +++++++++++++++---- 2 files changed, 76 insertions(+), 25 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d7d24940a0885..3b4c3b615e235 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3946,7 +3946,7 @@ def pct_change( self, periods: int = 1, fill_method: FillnaOptions | lib.NoDefault = lib.no_default, - limit: int | None | lib.NoDefault = lib.no_default, + limit: int | None = None, freq=None, axis: Axis | lib.NoDefault = lib.no_default, ): @@ -3958,20 +3958,21 @@ def pct_change( Series or DataFrame Percentage changes within each group. """ - if fill_method is not lib.no_default or limit is not lib.no_default: - # GH#53491 + # GH#53491: deprecate default fill_method="ffill" + # TODO: In 3.x, change default fill_method=None, then also in 3.x + # deprecate the fill_method and limit keywords, and finally remove + # them in 4.x + if fill_method is lib.no_default: warnings.warn( - "The 'fill_method' and 'limit' keywords in " - f"{type(self).__name__}.pct_change are deprecated and will be " - "removed in a future version. Call fillna directly before " - "calling pct_change instead.", + f"The default fill_method='ffill' in {type(self).__name__}.pct_change " + "is deprecated and will be changed to None in a future version of " + "pandas. Pass fill_method='ffill' to retain current behavior or " + "fill_method=None to adopt the future default and silence this " + "warning.", FutureWarning, stacklevel=find_stack_level(), ) - if fill_method is lib.no_default: fill_method = "ffill" - if limit is lib.no_default: - limit = None if axis is not lib.no_default: axis = self.obj._get_axis_number(axis) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 6f67a5aae03ba..aff03b3654af5 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -169,8 +169,18 @@ def test_transform_axis_1(request, transformation_func): msg = "DataFrame.groupby with axis=1 is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): gb = df.groupby([0, 0, 1], axis=1) - result = gb.transform(transformation_func, *args) - expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T + + pct_change_msg = ( + "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + ) + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=pct_change_msg): + result = gb.transform("pct_change", *args) + with tm.assert_produces_warning(FutureWarning, match=pct_change_msg): + expected = df.T.groupby([0, 0, 1]).transform("pct_change", *args).T + else: + result = gb.transform(transformation_func, *args) + expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T if transformation_func in ["diff", "shift"]: # Result contains nans, so transpose coerces to float @@ -404,11 +414,25 @@ def mock_op(x): test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() - result = test_op(df.groupby("A")) + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" + groupby_msg = ( + "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + ) + + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=groupby_msg): + result = test_op(df.groupby("A")) + else: + result = test_op(df.groupby("A")) + # pass the group in same order as iterating `for ... in df.groupby(...)` # but reorder to match df's index since this is a transform groups = [df[["B"]].iloc[4:6], df[["B"]].iloc[6:], df[["B"]].iloc[:4]] - expected = concat([mock_op(g) for g in groups]).sort_index() + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = concat([mock_op(g) for g in groups]).sort_index() + else: + expected = concat([mock_op(g) for g in groups]).sort_index() # sort_index does not preserve the freq expected = expected.set_axis(df.index) @@ -969,14 +993,9 @@ def test_pct_change(frame_or_series, freq, periods, fill_method, limit): else: expected = expected.to_frame("vals") - msg = ( - "The 'fill_method' and 'limit' keywords in " - f"{type(gb).__name__}.pct_change are deprecated" + result = gb.pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb.pct_change( - periods=periods, fill_method=fill_method, limit=limit, freq=freq - ) tm.assert_equal(result, expected) @@ -1394,6 +1413,11 @@ def test_null_group_str_transformer(request, dropna, transformation_func): args = get_groupby_method_args(transformation_func, df) gb = df.groupby("A", dropna=dropna) + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" + groupby_msg = ( + "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + ) + buffer = [] for k, (idx, group) in enumerate(gb): if transformation_func == "cumcount": @@ -1401,6 +1425,9 @@ def test_null_group_str_transformer(request, dropna, transformation_func): res = DataFrame({"B": range(len(group))}, index=group.index) elif transformation_func == "ngroup": res = DataFrame(len(group) * [k], index=group.index, columns=["B"]) + elif transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + res = getattr(group[["B"]], "pct_change")(*args) else: res = getattr(group[["B"]], transformation_func)(*args) buffer.append(res) @@ -1413,7 +1440,11 @@ def test_null_group_str_transformer(request, dropna, transformation_func): # ngroup/cumcount always returns a Series as it counts the groups, not values expected = expected["B"].rename(None) - result = gb.transform(transformation_func, *args) + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=groupby_msg): + result = gb.transform("pct_change", *args) + else: + result = gb.transform(transformation_func, *args) tm.assert_equal(result, expected) @@ -1465,6 +1496,11 @@ def test_null_group_str_transformer_series(dropna, transformation_func): args = get_groupby_method_args(transformation_func, ser) gb = ser.groupby([1, 1, np.nan], dropna=dropna) + msg = "The default fill_method='pad' in Series.pct_change is deprecated" + groupby_msg = ( + "The default fill_method='ffill' in SeriesGroupBy.pct_change is deprecated" + ) + buffer = [] for k, (idx, group) in enumerate(gb): if transformation_func == "cumcount": @@ -1472,6 +1508,9 @@ def test_null_group_str_transformer_series(dropna, transformation_func): res = Series(range(len(group)), index=group.index) elif transformation_func == "ngroup": res = Series(k, index=group.index) + elif transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + res = getattr(group, "pct_change")(*args) else: res = getattr(group, transformation_func)(*args) buffer.append(res) @@ -1480,7 +1519,10 @@ def test_null_group_str_transformer_series(dropna, transformation_func): buffer.append(Series([np.nan], index=[3], dtype=dtype)) expected = concat(buffer) - with tm.assert_produces_warning(None): + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=groupby_msg): + result = gb.transform("pct_change", *args) + else: result = gb.transform(transformation_func, *args) tm.assert_equal(result, expected) @@ -1523,6 +1565,14 @@ def test_as_index_no_change(keys, df, groupby_func): args = get_groupby_method_args(groupby_func, df) gb_as_index_true = df.groupby(keys, as_index=True) gb_as_index_false = df.groupby(keys, as_index=False) - result = gb_as_index_true.transform(groupby_func, *args) - expected = gb_as_index_false.transform(groupby_func, *args) + + msg = "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + if groupby_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb_as_index_true.transform("pct_change", *args) + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = gb_as_index_false.transform("pct_change", *args) + else: + result = gb_as_index_true.transform(groupby_func, *args) + expected = gb_as_index_false.transform(groupby_func, *args) tm.assert_equal(result, expected) From 959ee5d5b69a8b9e098351c89f67d6af3eca05cf Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Mon, 5 Jun 2023 18:52:44 +0800 Subject: [PATCH 07/13] changelod updated correspondingly --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index b97d5ea7ce1f4..e15a8bfd82c92 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -223,7 +223,6 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) -- Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``fillna`` before calling ``pct_change`` instead (:issue:`53491`) - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`) - Deprecated 'quantile' keyword in :meth:`Rolling.quantile` and :meth:`Expanding.quantile`, renamed as 'q' instead (:issue:`52550`) - Deprecated :meth:`.DataFrameGroupBy.apply` and methods on the objects returned by :meth:`.DataFrameGroupBy.resample` operating on the grouping column(s); select the columns to operate on after groupby to either explicitly include or exclude the groupings and avoid the ``FutureWarning`` (:issue:`7155`) @@ -240,6 +239,7 @@ Deprecations - Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`) - Deprecated the ``axis`` keyword in :meth:`DataFrame.resample`, :meth:`Series.resample` (:issue:`51778`) - Deprecated the behavior of :func:`concat` with both ``len(keys) != len(objs)``, in a future version this will raise instead of truncating to the shorter of the two sequences (:issue:`43485`) +- Deprecated the default of ``fill_method="pad"`` in :meth:`DataFrame.pct_change` and :meth:`Series.pct_change`, and ``fill_method="ffill"`` in :meth:`DataFrameGroupBy.pct_change` and :meth:`SeriesGroupBy.pct_change`; these will all default to ``None`` in a future version (:issue:`53491`) - Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`) - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`) - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`) From 2cb449bab9ede4440d01d8158c9d6b44ecc4632e Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Thu, 8 Jun 2023 00:22:32 +0800 Subject: [PATCH 08/13] reverting --- pandas/core/generic.py | 21 ++--- pandas/core/groupby/groupby.py | 21 ++--- pandas/tests/frame/methods/test_pct_change.py | 78 ++++++------------ .../tests/groupby/transform/test_transform.py | 80 +++++++++++++++---- 4 files changed, 110 insertions(+), 90 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0704cf3d1dae1..9b6d62fe9e3ca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11144,7 +11144,7 @@ def pct_change( self, periods: int = 1, fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default, - limit: int | None | lib.NoDefault = lib.no_default, + limit: int | None = None, freq=None, **kwargs, ) -> Self: @@ -11275,20 +11275,21 @@ def pct_change( GOOG 0.179241 0.094112 NaN APPL -0.252395 -0.011860 NaN """ - if fill_method is not lib.no_default or limit is not lib.no_default: - # GH#53491 + # GH#53491: deprecate default fill_method="pad" + # TODO: In 3.x, change default fill_method=None, then also in 3.x + # deprecate the fill_method and limit keywords, and finally remove + # them in 4.x + if fill_method is lib.no_default: warnings.warn( - "The 'fill_method' and 'limit' keywords in " - f"{type(self).__name__}.pct_change are deprecated and will be " - "removed in a future version. Call fillna directly before " - "calling pct_change instead.", + f"The default fill_method='pad' in {type(self).__name__}.pct_change " + "is deprecated and will be changed to None in a future version of " + "pandas. Pass fill_method='pad' to retain current behavior or " + "fill_method=None to adopt the future default and silence this " + "warning.", FutureWarning, stacklevel=find_stack_level(), ) - if fill_method is lib.no_default: fill_method = "pad" - if limit is lib.no_default: - limit = None axis = self._get_axis_number(kwargs.pop("axis", "index")) if fill_method is None: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d7d24940a0885..3b4c3b615e235 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3946,7 +3946,7 @@ def pct_change( self, periods: int = 1, fill_method: FillnaOptions | lib.NoDefault = lib.no_default, - limit: int | None | lib.NoDefault = lib.no_default, + limit: int | None = None, freq=None, axis: Axis | lib.NoDefault = lib.no_default, ): @@ -3958,20 +3958,21 @@ def pct_change( Series or DataFrame Percentage changes within each group. """ - if fill_method is not lib.no_default or limit is not lib.no_default: - # GH#53491 + # GH#53491: deprecate default fill_method="ffill" + # TODO: In 3.x, change default fill_method=None, then also in 3.x + # deprecate the fill_method and limit keywords, and finally remove + # them in 4.x + if fill_method is lib.no_default: warnings.warn( - "The 'fill_method' and 'limit' keywords in " - f"{type(self).__name__}.pct_change are deprecated and will be " - "removed in a future version. Call fillna directly before " - "calling pct_change instead.", + f"The default fill_method='ffill' in {type(self).__name__}.pct_change " + "is deprecated and will be changed to None in a future version of " + "pandas. Pass fill_method='ffill' to retain current behavior or " + "fill_method=None to adopt the future default and silence this " + "warning.", FutureWarning, stacklevel=find_stack_level(), ) - if fill_method is lib.no_default: fill_method = "ffill" - if limit is lib.no_default: - limit = None if axis is not lib.no_default: axis = self.obj._get_axis_number(axis) diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py index d53631e75587d..9ad61693ee608 100644 --- a/pandas/tests/frame/methods/test_pct_change.py +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -10,7 +10,7 @@ class TestDataFramePctChange: @pytest.mark.parametrize( - "periods, fill_method, limit, exp", + "periods,fill_method,limit,exp", [ (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]), (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]), @@ -28,12 +28,7 @@ def test_pct_change_with_nas( vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] obj = frame_or_series(vals) - msg = ( - "The 'fill_method' and 'limit' keywords in " - f"{type(obj).__name__}.pct_change are deprecated" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) + res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) tm.assert_equal(res, frame_or_series(exp)) def test_pct_change_numeric(self): @@ -45,38 +40,28 @@ def test_pct_change_numeric(self): pnl.iat[1, 1] = np.nan pnl.iat[2, 3] = 60 - msg = ( - "The 'fill_method' and 'limit' keywords in " - "DataFrame.pct_change are deprecated" - ) - for axis in range(2): expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1 - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = pnl.pct_change(axis=axis, fill_method="pad") + result = pnl.pct_change(axis=axis, fill_method="pad") tm.assert_frame_equal(result, expected) def test_pct_change(self, datetime_frame): - msg = ( - "The 'fill_method' and 'limit' keywords in " - "DataFrame.pct_change are deprecated" - ) + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = datetime_frame.pct_change(fill_method=None) + rs = datetime_frame.pct_change(fill_method=None) tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) - rs = datetime_frame.pct_change(2) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_frame.pct_change(2) filled = datetime_frame.fillna(method="pad") tm.assert_frame_equal(rs, filled / filled.shift(2) - 1) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = datetime_frame.pct_change(fill_method="bfill", limit=1) + rs = datetime_frame.pct_change(fill_method="bfill", limit=1) filled = datetime_frame.fillna(method="bfill", limit=1) tm.assert_frame_equal(rs, filled / filled.shift(1) - 1) - rs = datetime_frame.pct_change(freq="5D") + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_frame.pct_change(freq="5D") filled = datetime_frame.fillna(method="pad") tm.assert_frame_equal( rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) @@ -84,10 +69,12 @@ def test_pct_change(self, datetime_frame): def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) - df = DataFrame({"a": s, "b": s}) - chg = df.pct_change() + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + chg = df.pct_change() + expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) edf = DataFrame({"a": expected, "b": expected}) tm.assert_frame_equal(chg, edf) @@ -106,31 +93,18 @@ def test_pct_change_shift_over_nas(self): def test_pct_change_periods_freq( self, datetime_frame, freq, periods, fill_method, limit ): - msg = ( - "The 'fill_method' and 'limit' keywords in " - "DataFrame.pct_change are deprecated" - ) - # GH#7292 - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_freq = datetime_frame.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_periods = datetime_frame.pct_change( - periods, fill_method=fill_method, limit=limit - ) + rs_freq = datetime_frame.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + rs_periods = datetime_frame.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_frame_equal(rs_freq, rs_periods) empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_freq = empty_ts.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs_periods = empty_ts.pct_change( - periods, fill_method=fill_method, limit=limit - ) + rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) + rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) tm.assert_frame_equal(rs_freq, rs_periods) @@ -140,13 +114,7 @@ def test_pct_change_with_duplicated_indices(fill_method): data = DataFrame( {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3 ) - - msg = ( - "The 'fill_method' and 'limit' keywords in " - "DataFrame.pct_change are deprecated" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = data.pct_change(fill_method=fill_method) + result = data.pct_change(fill_method=fill_method) if fill_method is None: second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 6f67a5aae03ba..aff03b3654af5 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -169,8 +169,18 @@ def test_transform_axis_1(request, transformation_func): msg = "DataFrame.groupby with axis=1 is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): gb = df.groupby([0, 0, 1], axis=1) - result = gb.transform(transformation_func, *args) - expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T + + pct_change_msg = ( + "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + ) + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=pct_change_msg): + result = gb.transform("pct_change", *args) + with tm.assert_produces_warning(FutureWarning, match=pct_change_msg): + expected = df.T.groupby([0, 0, 1]).transform("pct_change", *args).T + else: + result = gb.transform(transformation_func, *args) + expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T if transformation_func in ["diff", "shift"]: # Result contains nans, so transpose coerces to float @@ -404,11 +414,25 @@ def mock_op(x): test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() - result = test_op(df.groupby("A")) + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" + groupby_msg = ( + "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + ) + + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=groupby_msg): + result = test_op(df.groupby("A")) + else: + result = test_op(df.groupby("A")) + # pass the group in same order as iterating `for ... in df.groupby(...)` # but reorder to match df's index since this is a transform groups = [df[["B"]].iloc[4:6], df[["B"]].iloc[6:], df[["B"]].iloc[:4]] - expected = concat([mock_op(g) for g in groups]).sort_index() + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = concat([mock_op(g) for g in groups]).sort_index() + else: + expected = concat([mock_op(g) for g in groups]).sort_index() # sort_index does not preserve the freq expected = expected.set_axis(df.index) @@ -969,14 +993,9 @@ def test_pct_change(frame_or_series, freq, periods, fill_method, limit): else: expected = expected.to_frame("vals") - msg = ( - "The 'fill_method' and 'limit' keywords in " - f"{type(gb).__name__}.pct_change are deprecated" + result = gb.pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb.pct_change( - periods=periods, fill_method=fill_method, limit=limit, freq=freq - ) tm.assert_equal(result, expected) @@ -1394,6 +1413,11 @@ def test_null_group_str_transformer(request, dropna, transformation_func): args = get_groupby_method_args(transformation_func, df) gb = df.groupby("A", dropna=dropna) + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" + groupby_msg = ( + "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + ) + buffer = [] for k, (idx, group) in enumerate(gb): if transformation_func == "cumcount": @@ -1401,6 +1425,9 @@ def test_null_group_str_transformer(request, dropna, transformation_func): res = DataFrame({"B": range(len(group))}, index=group.index) elif transformation_func == "ngroup": res = DataFrame(len(group) * [k], index=group.index, columns=["B"]) + elif transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + res = getattr(group[["B"]], "pct_change")(*args) else: res = getattr(group[["B"]], transformation_func)(*args) buffer.append(res) @@ -1413,7 +1440,11 @@ def test_null_group_str_transformer(request, dropna, transformation_func): # ngroup/cumcount always returns a Series as it counts the groups, not values expected = expected["B"].rename(None) - result = gb.transform(transformation_func, *args) + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=groupby_msg): + result = gb.transform("pct_change", *args) + else: + result = gb.transform(transformation_func, *args) tm.assert_equal(result, expected) @@ -1465,6 +1496,11 @@ def test_null_group_str_transformer_series(dropna, transformation_func): args = get_groupby_method_args(transformation_func, ser) gb = ser.groupby([1, 1, np.nan], dropna=dropna) + msg = "The default fill_method='pad' in Series.pct_change is deprecated" + groupby_msg = ( + "The default fill_method='ffill' in SeriesGroupBy.pct_change is deprecated" + ) + buffer = [] for k, (idx, group) in enumerate(gb): if transformation_func == "cumcount": @@ -1472,6 +1508,9 @@ def test_null_group_str_transformer_series(dropna, transformation_func): res = Series(range(len(group)), index=group.index) elif transformation_func == "ngroup": res = Series(k, index=group.index) + elif transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + res = getattr(group, "pct_change")(*args) else: res = getattr(group, transformation_func)(*args) buffer.append(res) @@ -1480,7 +1519,10 @@ def test_null_group_str_transformer_series(dropna, transformation_func): buffer.append(Series([np.nan], index=[3], dtype=dtype)) expected = concat(buffer) - with tm.assert_produces_warning(None): + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=groupby_msg): + result = gb.transform("pct_change", *args) + else: result = gb.transform(transformation_func, *args) tm.assert_equal(result, expected) @@ -1523,6 +1565,14 @@ def test_as_index_no_change(keys, df, groupby_func): args = get_groupby_method_args(groupby_func, df) gb_as_index_true = df.groupby(keys, as_index=True) gb_as_index_false = df.groupby(keys, as_index=False) - result = gb_as_index_true.transform(groupby_func, *args) - expected = gb_as_index_false.transform(groupby_func, *args) + + msg = "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + if groupby_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb_as_index_true.transform("pct_change", *args) + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = gb_as_index_false.transform("pct_change", *args) + else: + result = gb_as_index_true.transform(groupby_func, *args) + expected = gb_as_index_false.transform(groupby_func, *args) tm.assert_equal(result, expected) From 446569458dcff158f8fb804c03a6d0ebfbb152fc Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Thu, 8 Jun 2023 00:30:28 +0800 Subject: [PATCH 09/13] Revert "reverting" This reverts commit 2cb449bab9ede4440d01d8158c9d6b44ecc4632e. --- pandas/core/generic.py | 21 +++-- pandas/core/groupby/groupby.py | 21 +++-- pandas/tests/frame/methods/test_pct_change.py | 78 ++++++++++++------ .../tests/groupby/transform/test_transform.py | 80 ++++--------------- 4 files changed, 90 insertions(+), 110 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9b6d62fe9e3ca..0704cf3d1dae1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11144,7 +11144,7 @@ def pct_change( self, periods: int = 1, fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default, - limit: int | None = None, + limit: int | None | lib.NoDefault = lib.no_default, freq=None, **kwargs, ) -> Self: @@ -11275,21 +11275,20 @@ def pct_change( GOOG 0.179241 0.094112 NaN APPL -0.252395 -0.011860 NaN """ - # GH#53491: deprecate default fill_method="pad" - # TODO: In 3.x, change default fill_method=None, then also in 3.x - # deprecate the fill_method and limit keywords, and finally remove - # them in 4.x - if fill_method is lib.no_default: + if fill_method is not lib.no_default or limit is not lib.no_default: + # GH#53491 warnings.warn( - f"The default fill_method='pad' in {type(self).__name__}.pct_change " - "is deprecated and will be changed to None in a future version of " - "pandas. Pass fill_method='pad' to retain current behavior or " - "fill_method=None to adopt the future default and silence this " - "warning.", + "The 'fill_method' and 'limit' keywords in " + f"{type(self).__name__}.pct_change are deprecated and will be " + "removed in a future version. Call fillna directly before " + "calling pct_change instead.", FutureWarning, stacklevel=find_stack_level(), ) + if fill_method is lib.no_default: fill_method = "pad" + if limit is lib.no_default: + limit = None axis = self._get_axis_number(kwargs.pop("axis", "index")) if fill_method is None: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 3b4c3b615e235..d7d24940a0885 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3946,7 +3946,7 @@ def pct_change( self, periods: int = 1, fill_method: FillnaOptions | lib.NoDefault = lib.no_default, - limit: int | None = None, + limit: int | None | lib.NoDefault = lib.no_default, freq=None, axis: Axis | lib.NoDefault = lib.no_default, ): @@ -3958,21 +3958,20 @@ def pct_change( Series or DataFrame Percentage changes within each group. """ - # GH#53491: deprecate default fill_method="ffill" - # TODO: In 3.x, change default fill_method=None, then also in 3.x - # deprecate the fill_method and limit keywords, and finally remove - # them in 4.x - if fill_method is lib.no_default: + if fill_method is not lib.no_default or limit is not lib.no_default: + # GH#53491 warnings.warn( - f"The default fill_method='ffill' in {type(self).__name__}.pct_change " - "is deprecated and will be changed to None in a future version of " - "pandas. Pass fill_method='ffill' to retain current behavior or " - "fill_method=None to adopt the future default and silence this " - "warning.", + "The 'fill_method' and 'limit' keywords in " + f"{type(self).__name__}.pct_change are deprecated and will be " + "removed in a future version. Call fillna directly before " + "calling pct_change instead.", FutureWarning, stacklevel=find_stack_level(), ) + if fill_method is lib.no_default: fill_method = "ffill" + if limit is lib.no_default: + limit = None if axis is not lib.no_default: axis = self.obj._get_axis_number(axis) diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py index 9ad61693ee608..d53631e75587d 100644 --- a/pandas/tests/frame/methods/test_pct_change.py +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -10,7 +10,7 @@ class TestDataFramePctChange: @pytest.mark.parametrize( - "periods,fill_method,limit,exp", + "periods, fill_method, limit, exp", [ (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]), (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]), @@ -28,7 +28,12 @@ def test_pct_change_with_nas( vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] obj = frame_or_series(vals) - res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) + msg = ( + "The 'fill_method' and 'limit' keywords in " + f"{type(obj).__name__}.pct_change are deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit) tm.assert_equal(res, frame_or_series(exp)) def test_pct_change_numeric(self): @@ -40,28 +45,38 @@ def test_pct_change_numeric(self): pnl.iat[1, 1] = np.nan pnl.iat[2, 3] = 60 + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" + ) + for axis in range(2): expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1 - result = pnl.pct_change(axis=axis, fill_method="pad") + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = pnl.pct_change(axis=axis, fill_method="pad") tm.assert_frame_equal(result, expected) def test_pct_change(self, datetime_frame): - msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" + ) - rs = datetime_frame.pct_change(fill_method=None) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_frame.pct_change(fill_method=None) tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = datetime_frame.pct_change(2) + rs = datetime_frame.pct_change(2) filled = datetime_frame.fillna(method="pad") tm.assert_frame_equal(rs, filled / filled.shift(2) - 1) - rs = datetime_frame.pct_change(fill_method="bfill", limit=1) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = datetime_frame.pct_change(fill_method="bfill", limit=1) filled = datetime_frame.fillna(method="bfill", limit=1) tm.assert_frame_equal(rs, filled / filled.shift(1) - 1) - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = datetime_frame.pct_change(freq="5D") + rs = datetime_frame.pct_change(freq="5D") filled = datetime_frame.fillna(method="pad") tm.assert_frame_equal( rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) @@ -69,12 +84,10 @@ def test_pct_change(self, datetime_frame): def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) - df = DataFrame({"a": s, "b": s}) - msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - chg = df.pct_change() + df = DataFrame({"a": s, "b": s}) + chg = df.pct_change() expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) edf = DataFrame({"a": expected, "b": expected}) tm.assert_frame_equal(chg, edf) @@ -93,18 +106,31 @@ def test_pct_change_shift_over_nas(self): def test_pct_change_periods_freq( self, datetime_frame, freq, periods, fill_method, limit ): - # GH#7292 - rs_freq = datetime_frame.pct_change( - freq=freq, fill_method=fill_method, limit=limit - ) - rs_periods = datetime_frame.pct_change( - periods, fill_method=fill_method, limit=limit + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" ) + + # GH#7292 + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_freq = datetime_frame.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_periods = datetime_frame.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_frame_equal(rs_freq, rs_periods) empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns) - rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) - rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_freq = empty_ts.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs_periods = empty_ts.pct_change( + periods, fill_method=fill_method, limit=limit + ) tm.assert_frame_equal(rs_freq, rs_periods) @@ -114,7 +140,13 @@ def test_pct_change_with_duplicated_indices(fill_method): data = DataFrame( {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3 ) - result = data.pct_change(fill_method=fill_method) + + msg = ( + "The 'fill_method' and 'limit' keywords in " + "DataFrame.pct_change are deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = data.pct_change(fill_method=fill_method) if fill_method is None: second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index aff03b3654af5..6f67a5aae03ba 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -169,18 +169,8 @@ def test_transform_axis_1(request, transformation_func): msg = "DataFrame.groupby with axis=1 is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): gb = df.groupby([0, 0, 1], axis=1) - - pct_change_msg = ( - "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" - ) - if transformation_func == "pct_change": - with tm.assert_produces_warning(FutureWarning, match=pct_change_msg): - result = gb.transform("pct_change", *args) - with tm.assert_produces_warning(FutureWarning, match=pct_change_msg): - expected = df.T.groupby([0, 0, 1]).transform("pct_change", *args).T - else: - result = gb.transform(transformation_func, *args) - expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T + result = gb.transform(transformation_func, *args) + expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T if transformation_func in ["diff", "shift"]: # Result contains nans, so transpose coerces to float @@ -414,25 +404,11 @@ def mock_op(x): test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() - msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" - groupby_msg = ( - "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" - ) - - if transformation_func == "pct_change": - with tm.assert_produces_warning(FutureWarning, match=groupby_msg): - result = test_op(df.groupby("A")) - else: - result = test_op(df.groupby("A")) - + result = test_op(df.groupby("A")) # pass the group in same order as iterating `for ... in df.groupby(...)` # but reorder to match df's index since this is a transform groups = [df[["B"]].iloc[4:6], df[["B"]].iloc[6:], df[["B"]].iloc[:4]] - if transformation_func == "pct_change": - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = concat([mock_op(g) for g in groups]).sort_index() - else: - expected = concat([mock_op(g) for g in groups]).sort_index() + expected = concat([mock_op(g) for g in groups]).sort_index() # sort_index does not preserve the freq expected = expected.set_axis(df.index) @@ -993,9 +969,14 @@ def test_pct_change(frame_or_series, freq, periods, fill_method, limit): else: expected = expected.to_frame("vals") - result = gb.pct_change( - periods=periods, fill_method=fill_method, limit=limit, freq=freq + msg = ( + "The 'fill_method' and 'limit' keywords in " + f"{type(gb).__name__}.pct_change are deprecated" ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq + ) tm.assert_equal(result, expected) @@ -1413,11 +1394,6 @@ def test_null_group_str_transformer(request, dropna, transformation_func): args = get_groupby_method_args(transformation_func, df) gb = df.groupby("A", dropna=dropna) - msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" - groupby_msg = ( - "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" - ) - buffer = [] for k, (idx, group) in enumerate(gb): if transformation_func == "cumcount": @@ -1425,9 +1401,6 @@ def test_null_group_str_transformer(request, dropna, transformation_func): res = DataFrame({"B": range(len(group))}, index=group.index) elif transformation_func == "ngroup": res = DataFrame(len(group) * [k], index=group.index, columns=["B"]) - elif transformation_func == "pct_change": - with tm.assert_produces_warning(FutureWarning, match=msg): - res = getattr(group[["B"]], "pct_change")(*args) else: res = getattr(group[["B"]], transformation_func)(*args) buffer.append(res) @@ -1440,11 +1413,7 @@ def test_null_group_str_transformer(request, dropna, transformation_func): # ngroup/cumcount always returns a Series as it counts the groups, not values expected = expected["B"].rename(None) - if transformation_func == "pct_change": - with tm.assert_produces_warning(FutureWarning, match=groupby_msg): - result = gb.transform("pct_change", *args) - else: - result = gb.transform(transformation_func, *args) + result = gb.transform(transformation_func, *args) tm.assert_equal(result, expected) @@ -1496,11 +1465,6 @@ def test_null_group_str_transformer_series(dropna, transformation_func): args = get_groupby_method_args(transformation_func, ser) gb = ser.groupby([1, 1, np.nan], dropna=dropna) - msg = "The default fill_method='pad' in Series.pct_change is deprecated" - groupby_msg = ( - "The default fill_method='ffill' in SeriesGroupBy.pct_change is deprecated" - ) - buffer = [] for k, (idx, group) in enumerate(gb): if transformation_func == "cumcount": @@ -1508,9 +1472,6 @@ def test_null_group_str_transformer_series(dropna, transformation_func): res = Series(range(len(group)), index=group.index) elif transformation_func == "ngroup": res = Series(k, index=group.index) - elif transformation_func == "pct_change": - with tm.assert_produces_warning(FutureWarning, match=msg): - res = getattr(group, "pct_change")(*args) else: res = getattr(group, transformation_func)(*args) buffer.append(res) @@ -1519,10 +1480,7 @@ def test_null_group_str_transformer_series(dropna, transformation_func): buffer.append(Series([np.nan], index=[3], dtype=dtype)) expected = concat(buffer) - if transformation_func == "pct_change": - with tm.assert_produces_warning(FutureWarning, match=groupby_msg): - result = gb.transform("pct_change", *args) - else: + with tm.assert_produces_warning(None): result = gb.transform(transformation_func, *args) tm.assert_equal(result, expected) @@ -1565,14 +1523,6 @@ def test_as_index_no_change(keys, df, groupby_func): args = get_groupby_method_args(groupby_func, df) gb_as_index_true = df.groupby(keys, as_index=True) gb_as_index_false = df.groupby(keys, as_index=False) - - msg = "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" - if groupby_func == "pct_change": - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb_as_index_true.transform("pct_change", *args) - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = gb_as_index_false.transform("pct_change", *args) - else: - result = gb_as_index_true.transform(groupby_func, *args) - expected = gb_as_index_false.transform(groupby_func, *args) + result = gb_as_index_true.transform(groupby_func, *args) + expected = gb_as_index_false.transform(groupby_func, *args) tm.assert_equal(result, expected) From f87392b40fe0b09b97a83d18c03faf7cc40bf6e0 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Thu, 8 Jun 2023 01:45:04 +0800 Subject: [PATCH 10/13] resolved conversation: also warn if nan and not specified --- pandas/core/generic.py | 16 ++++++++++--- pandas/core/groupby/groupby.py | 16 ++++++++++--- pandas/tests/frame/methods/test_pct_change.py | 5 +++- .../tests/groupby/transform/test_transform.py | 24 ++++++++++++++++--- .../tests/series/methods/test_pct_change.py | 5 +++- 5 files changed, 55 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0704cf3d1dae1..2c05d4d7a67ef 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11275,17 +11275,27 @@ def pct_change( GOOG 0.179241 0.094112 NaN APPL -0.252395 -0.011860 NaN """ + # GH#53491 if fill_method is not lib.no_default or limit is not lib.no_default: - # GH#53491 warnings.warn( "The 'fill_method' and 'limit' keywords in " f"{type(self).__name__}.pct_change are deprecated and will be " - "removed in a future version. Call fillna directly before " - "calling pct_change instead.", + "removed in a future version. Call fillna before calling pct_change " + "instead.", FutureWarning, stacklevel=find_stack_level(), ) if fill_method is lib.no_default: + if self.isna().values.any(): + warnings.warn( + "The default fill_method='pad' in " + f"{type(self).__name__}.pct_change is deprecated and will be " + "removed in a future version. Call fillna with method='ffill' " + "before calling pct_change to retain current behavior and " + "and silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) fill_method = "pad" if limit is lib.no_default: limit = None diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d7d24940a0885..1566fb04b7464 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3958,17 +3958,27 @@ def pct_change( Series or DataFrame Percentage changes within each group. """ + # GH#53491 if fill_method is not lib.no_default or limit is not lib.no_default: - # GH#53491 warnings.warn( "The 'fill_method' and 'limit' keywords in " f"{type(self).__name__}.pct_change are deprecated and will be " - "removed in a future version. Call fillna directly before " - "calling pct_change instead.", + "removed in a future version. Call fillna before calling pct_change " + "instead.", FutureWarning, stacklevel=find_stack_level(), ) if fill_method is lib.no_default: + if any(grp.isna().values.any() for _, grp in self): + warnings.warn( + "The default fill_method='ffill' in " + f"{type(self).__name__}.pct_change is deprecated and will be " + "removed in a future version. Call fillna with method='ffill' " + "before calling pct_change to retain current behavior and " + "silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) fill_method = "ffill" if limit is lib.no_default: limit = None diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py index d53631e75587d..e07d01b0d2f95 100644 --- a/pandas/tests/frame/methods/test_pct_change.py +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -87,7 +87,10 @@ def test_pct_change_shift_over_nas(self): df = DataFrame({"a": s, "b": s}) - chg = df.pct_change() + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + chg = df.pct_change() + expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) edf = DataFrame({"a": expected, "b": expected}) tm.assert_frame_equal(chg, edf) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 6f67a5aae03ba..1544dfd22b12f 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -404,11 +404,24 @@ def mock_op(x): test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() - result = test_op(df.groupby("A")) + msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated" + groupby_msg = ( + "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + ) + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=groupby_msg): + result = test_op(df.groupby("A")) + else: + result = test_op(df.groupby("A")) + # pass the group in same order as iterating `for ... in df.groupby(...)` # but reorder to match df's index since this is a transform groups = [df[["B"]].iloc[4:6], df[["B"]].iloc[6:], df[["B"]].iloc[:4]] - expected = concat([mock_op(g) for g in groups]).sort_index() + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = concat([mock_op(g) for g in groups]).sort_index() + else: + expected = concat([mock_op(g) for g in groups]).sort_index() # sort_index does not preserve the freq expected = expected.set_axis(df.index) @@ -1413,7 +1426,12 @@ def test_null_group_str_transformer(request, dropna, transformation_func): # ngroup/cumcount always returns a Series as it counts the groups, not values expected = expected["B"].rename(None) - result = gb.transform(transformation_func, *args) + msg = "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + if transformation_func == "pct_change" and not dropna: + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.transform("pct_change", *args) + else: + result = gb.transform(transformation_func, *args) tm.assert_equal(result, expected) diff --git a/pandas/tests/series/methods/test_pct_change.py b/pandas/tests/series/methods/test_pct_change.py index 51447fc0283aa..d149bf87f4726 100644 --- a/pandas/tests/series/methods/test_pct_change.py +++ b/pandas/tests/series/methods/test_pct_change.py @@ -47,7 +47,10 @@ def test_pct_change_with_duplicate_axis(self): def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) - chg = s.pct_change() + msg = "The default fill_method='pad' in Series.pct_change is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + chg = s.pct_change() + expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) tm.assert_series_equal(chg, expected) From 375449ec6771ecda054210bd4260707b7289bcfa Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Thu, 8 Jun 2023 11:32:56 +0800 Subject: [PATCH 11/13] modify msgs since fillna method deprecated; tests updated --- pandas/core/generic.py | 10 +++++----- pandas/core/groupby/groupby.py | 10 +++++----- pandas/tests/groupby/test_groupby_dropna.py | 9 ++++++++- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 382b1cc51e3d8..1d2b6e32a08f9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11356,8 +11356,9 @@ def pct_change( warnings.warn( "The 'fill_method' and 'limit' keywords in " f"{type(self).__name__}.pct_change are deprecated and will be " - "removed in a future version. Call fillna before calling pct_change " - "instead.", + "removed in a future version. Call " + f"{'bfill' if fill_method in ('backfill', 'bfill') else 'ffill'} " + "before calling pct_change instead.", FutureWarning, stacklevel=find_stack_level(), ) @@ -11366,9 +11367,8 @@ def pct_change( warnings.warn( "The default fill_method='pad' in " f"{type(self).__name__}.pct_change is deprecated and will be " - "removed in a future version. Call fillna with method='ffill' " - "before calling pct_change to retain current behavior and " - "and silence this warning.", + "removed in a future version. Call ffill before calling " + "pct_change to retain current behavior and silence this warning.", FutureWarning, stacklevel=find_stack_level(), ) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 845e724ef3087..bbe59f9884aa1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4381,8 +4381,9 @@ def pct_change( warnings.warn( "The 'fill_method' and 'limit' keywords in " f"{type(self).__name__}.pct_change are deprecated and will be " - "removed in a future version. Call fillna before calling pct_change " - "instead.", + "removed in a future version. Call " + f"{'bfill' if fill_method in ('backfill', 'bfill') else 'ffill'} " + "before calling pct_change instead.", FutureWarning, stacklevel=find_stack_level(), ) @@ -4391,9 +4392,8 @@ def pct_change( warnings.warn( "The default fill_method='ffill' in " f"{type(self).__name__}.pct_change is deprecated and will be " - "removed in a future version. Call fillna with method='ffill' " - "before calling pct_change to retain current behavior and " - "silence this warning.", + "removed in a future version. Call ffill before calling " + "pct_change to retain current behavior and silence this warning.", FutureWarning, stacklevel=find_stack_level(), ) diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 269fda8fbf361..ab268a1d94b96 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -622,8 +622,15 @@ def test_categorical_transformers( "x", dropna=False, observed=observed, sort=sort, as_index=as_index ) gb_dropna = df.groupby("x", dropna=True, observed=observed, sort=sort) - result = getattr(gb_keepna, transformation_func)(*args) + + msg = "The default fill_method='ffill' in DataFrameGroupBy.pct_change is deprecated" + if transformation_func == "pct_change": + with tm.assert_produces_warning(FutureWarning, match=msg): + result = getattr(gb_keepna, "pct_change")(*args) + else: + result = getattr(gb_keepna, transformation_func)(*args) expected = getattr(gb_dropna, transformation_func)(*args) + for iloc, value in zip( df[df["x"].isnull()].index.tolist(), null_group_result.values.ravel() ): From 0ca61a5707d2f019e4b21896a36c19d5f123333f Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Thu, 8 Jun 2023 11:35:16 +0800 Subject: [PATCH 12/13] changelog updated --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3baf06bba7359..91d82d7742c1a 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -225,6 +225,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) +- Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``ffill`` or ``bfill`` before calling ``pct_change`` instead (:issue:`53491`) - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`) - Deprecated 'quantile' keyword in :meth:`Rolling.quantile` and :meth:`Expanding.quantile`, renamed as 'q' instead (:issue:`52550`) - Deprecated :meth:`.DataFrameGroupBy.apply` and methods on the objects returned by :meth:`.DataFrameGroupBy.resample` operating on the grouping column(s); select the columns to operate on after groupby to either explicitly include or exclude the groupings and avoid the ``FutureWarning`` (:issue:`7155`) @@ -243,7 +244,6 @@ Deprecations - Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`) - Deprecated the ``axis`` keyword in :meth:`DataFrame.resample`, :meth:`Series.resample` (:issue:`51778`) - Deprecated the behavior of :func:`concat` with both ``len(keys) != len(objs)``, in a future version this will raise instead of truncating to the shorter of the two sequences (:issue:`43485`) -- Deprecated the default of ``fill_method="pad"`` in :meth:`DataFrame.pct_change` and :meth:`Series.pct_change`, and ``fill_method="ffill"`` in :meth:`DataFrameGroupBy.pct_change` and :meth:`SeriesGroupBy.pct_change`; these will all default to ``None`` in a future version (:issue:`53491`) - Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`) - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`) - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`) From c4b6732e410aea5602ec8ac463a64783c640bc37 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Thu, 8 Jun 2023 13:55:32 +0800 Subject: [PATCH 13/13] docstring use ffill instead of fillna --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1d2b6e32a08f9..f73ef36f76086 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11304,7 +11304,7 @@ def pct_change( 3 85.0 dtype: float64 - >>> s.fillna(method='ffill').pct_change() + >>> s.ffill().pct_change() 0 NaN 1 0.011111 2 0.000000