Skip to content

Commit 9ea2877

Browse files
simonariddelljreback
authored andcommitted
pct change bug issue 21200 (#21235)
1 parent 89f0441 commit 9ea2877

File tree

4 files changed

+62
-30
lines changed

4 files changed

+62
-30
lines changed

doc/source/whatsnew/v0.24.0.rst

+27
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,33 @@ Backwards incompatible API changes
382382
- :meth:`read_csv` will now raise a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`)
383383
- The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`)
384384

385+
Percentage change on groupby changes
386+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
387+
388+
Fixed a bug where calling :func:`SeriesGroupBy.pct_change` or :func:`DataFrameGroupBy.pct_change` would previously work across groups when calculating the percent change, where it now correctly works per group (:issue:`21200`, :issue:`21235`).
389+
390+
.. ipython:: python
391+
392+
df = pd.DataFrame({'grp': ['a', 'a', 'b'], 'foo': [1.0, 1.1, 2.2]})
393+
df
394+
395+
Previous behavior:
396+
397+
.. code-block:: ipython
398+
399+
In [1]: df.groupby('grp').pct_change()
400+
Out[1]:
401+
foo
402+
0 NaN
403+
1 0.1
404+
2 1.0
405+
406+
New behavior:
407+
408+
.. ipython:: python
409+
410+
df.groupby('grp').pct_change()
411+
385412
.. _whatsnew_0240.api_breaking.deps:
386413

387414
Dependencies have increased minimum versions

pandas/core/groupby/generic.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1221,9 +1221,15 @@ def _apply_to_column_groupbys(self, func):
12211221
return func(self)
12221222

12231223
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None):
1224-
"""Calculate percent change of each value to previous entry in group"""
1224+
"""Calcuate pct_change of each value to previous entry in group"""
1225+
# TODO: Remove this conditional when #23918 is fixed
1226+
if freq:
1227+
return self.apply(lambda x: x.pct_change(periods=periods,
1228+
fill_method=fill_method,
1229+
limit=limit, freq=freq))
12251230
filled = getattr(self, fill_method)(limit=limit)
1226-
shifted = filled.shift(periods=periods, freq=freq)
1231+
fill_grp = filled.groupby(self.grouper.labels)
1232+
shifted = fill_grp.shift(periods=periods, freq=freq)
12271233

12281234
return (filled / shifted) - 1
12291235

pandas/core/groupby/groupby.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -2025,11 +2025,10 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
20252025
fill_method=fill_method,
20262026
limit=limit, freq=freq,
20272027
axis=axis))
2028-
2029-
filled = getattr(self, fill_method)(limit=limit).drop(
2030-
self.grouper.names, axis=1)
2031-
shifted = filled.shift(periods=periods, freq=freq)
2032-
2028+
filled = getattr(self, fill_method)(limit=limit)
2029+
filled = filled.drop(self.grouper.names, axis=1)
2030+
fill_grp = filled.groupby(self.grouper.labels)
2031+
shifted = fill_grp.shift(periods=periods, freq=freq)
20332032
return (filled / shifted) - 1
20342033

20352034
@Substitution(name='groupby')

pandas/tests/groupby/test_transform.py

+23-23
Original file line numberDiff line numberDiff line change
@@ -765,36 +765,36 @@ def test_pad_stable_sorting(fill_method):
765765

766766

767767
@pytest.mark.parametrize("test_series", [True, False])
768+
@pytest.mark.parametrize("freq", [
769+
None,
770+
pytest.param('D', marks=pytest.mark.xfail(
771+
reason='GH#23918 before method uses freq in vectorized approach'))])
768772
@pytest.mark.parametrize("periods,fill_method,limit", [
769773
(1, 'ffill', None), (1, 'ffill', 1),
770774
(1, 'bfill', None), (1, 'bfill', 1),
771775
(-1, 'ffill', None), (-1, 'ffill', 1),
772-
(-1, 'bfill', None), (-1, 'bfill', 1)])
773-
def test_pct_change(test_series, periods, fill_method, limit):
774-
vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
775-
exp_vals = Series(vals).pct_change(periods=periods,
776-
fill_method=fill_method,
777-
limit=limit).tolist()
778-
779-
df = DataFrame({'key': ['a'] * len(vals) + ['b'] * len(vals),
780-
'vals': vals * 2})
781-
grp = df.groupby('key')
782-
783-
def get_result(grp_obj):
784-
return grp_obj.pct_change(periods=periods,
785-
fill_method=fill_method,
786-
limit=limit)
776+
(-1, 'bfill', None), (-1, 'bfill', 1),
777+
])
778+
def test_pct_change(test_series, freq, periods, fill_method, limit):
779+
# GH 21200, 21621
780+
vals = [3, np.nan, np.nan, np.nan, 1, 2, 4, 10, np.nan, 4]
781+
keys = ['a', 'b']
782+
key_v = np.repeat(keys, len(vals))
783+
df = DataFrame({'key': key_v, 'vals': vals * 2})
784+
785+
df_g = getattr(df.groupby('key'), fill_method)(limit=limit)
786+
grp = df_g.groupby('key')
787+
788+
expected = grp['vals'].obj / grp['vals'].shift(periods) - 1
787789

788790
if test_series:
789-
exp = pd.Series(exp_vals * 2)
790-
exp.name = 'vals'
791-
grp = grp['vals']
792-
result = get_result(grp)
793-
tm.assert_series_equal(result, exp)
791+
result = df.groupby('key')['vals'].pct_change(
792+
periods=periods, fill_method=fill_method, limit=limit, freq=freq)
793+
tm.assert_series_equal(result, expected)
794794
else:
795-
exp = DataFrame({'vals': exp_vals * 2})
796-
result = get_result(grp)
797-
tm.assert_frame_equal(result, exp)
795+
result = df.groupby('key').pct_change(
796+
periods=periods, fill_method=fill_method, limit=limit, freq=freq)
797+
tm.assert_frame_equal(result, expected.to_frame('vals'))
798798

799799

800800
@pytest.mark.parametrize("func", [np.any, np.all])

0 commit comments

Comments
 (0)