diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 4d0373e4571da..80317d6806346 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -382,6 +382,33 @@ Backwards incompatible API changes - :meth:`read_csv` will now raise a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`) - The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`) +Percentage change on groupby changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Fixed a bug where calling :func:`SeriesGroupBy.pct_change` or :func:`DataFrameGroupBy.pct_change` would previously work across groups when calculating the percent change, where it now correctly works per group (:issue:`21200`, :issue:`21235`). + +.. ipython:: python + + df = pd.DataFrame({'grp': ['a', 'a', 'b'], 'foo': [1.0, 1.1, 2.2]}) + df + +Previous behavior: + +.. code-block:: ipython + + In [1]: df.groupby('grp').pct_change() + Out[1]: + foo + 0 NaN + 1 0.1 + 2 1.0 + +New behavior: + +.. ipython:: python + + df.groupby('grp').pct_change() + .. _whatsnew_0240.api_breaking.deps: Dependencies have increased minimum versions diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2f54f61818aa6..47ac1260d5179 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1221,9 +1221,15 @@ def _apply_to_column_groupbys(self, func): return func(self) def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None): - """Calculate percent change of each value to previous entry in group""" + """Calcuate pct_change of each value to previous entry in group""" + # TODO: Remove this conditional when #23918 is fixed + if freq: + return self.apply(lambda x: x.pct_change(periods=periods, + fill_method=fill_method, + limit=limit, freq=freq)) filled = getattr(self, fill_method)(limit=limit) - shifted = filled.shift(periods=periods, freq=freq) + fill_grp = filled.groupby(self.grouper.labels) + shifted = fill_grp.shift(periods=periods, freq=freq) return (filled / shifted) - 1 diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 45eaa3efa948a..4b915922cef93 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2025,11 +2025,10 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, fill_method=fill_method, limit=limit, freq=freq, axis=axis)) - - filled = getattr(self, fill_method)(limit=limit).drop( - self.grouper.names, axis=1) - shifted = filled.shift(periods=periods, freq=freq) - + filled = getattr(self, fill_method)(limit=limit) + filled = filled.drop(self.grouper.names, axis=1) + fill_grp = filled.groupby(self.grouper.labels) + shifted = fill_grp.shift(periods=periods, freq=freq) return (filled / shifted) - 1 @Substitution(name='groupby') diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index dbbf6e583796f..b6361b4ad76a0 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -765,36 +765,36 @@ def test_pad_stable_sorting(fill_method): @pytest.mark.parametrize("test_series", [True, False]) +@pytest.mark.parametrize("freq", [ + None, + pytest.param('D', marks=pytest.mark.xfail( + reason='GH#23918 before method uses freq in vectorized approach'))]) @pytest.mark.parametrize("periods,fill_method,limit", [ (1, 'ffill', None), (1, 'ffill', 1), (1, 'bfill', None), (1, 'bfill', 1), (-1, 'ffill', None), (-1, 'ffill', 1), - (-1, 'bfill', None), (-1, 'bfill', 1)]) -def test_pct_change(test_series, periods, fill_method, limit): - vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] - exp_vals = Series(vals).pct_change(periods=periods, - fill_method=fill_method, - limit=limit).tolist() - - df = DataFrame({'key': ['a'] * len(vals) + ['b'] * len(vals), - 'vals': vals * 2}) - grp = df.groupby('key') - - def get_result(grp_obj): - return grp_obj.pct_change(periods=periods, - fill_method=fill_method, - limit=limit) + (-1, 'bfill', None), (-1, 'bfill', 1), +]) +def test_pct_change(test_series, freq, periods, fill_method, limit): + # GH 21200, 21621 + vals = [3, np.nan, np.nan, np.nan, 1, 2, 4, 10, np.nan, 4] + keys = ['a', 'b'] + key_v = np.repeat(keys, len(vals)) + df = DataFrame({'key': key_v, 'vals': vals * 2}) + + df_g = getattr(df.groupby('key'), fill_method)(limit=limit) + grp = df_g.groupby('key') + + expected = grp['vals'].obj / grp['vals'].shift(periods) - 1 if test_series: - exp = pd.Series(exp_vals * 2) - exp.name = 'vals' - grp = grp['vals'] - result = get_result(grp) - tm.assert_series_equal(result, exp) + result = df.groupby('key')['vals'].pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq) + tm.assert_series_equal(result, expected) else: - exp = DataFrame({'vals': exp_vals * 2}) - result = get_result(grp) - tm.assert_frame_equal(result, exp) + result = df.groupby('key').pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq) + tm.assert_frame_equal(result, expected.to_frame('vals')) @pytest.mark.parametrize("func", [np.any, np.all])