Skip to content

Commit 47b5908

Browse files
committed
add whatsnew
1 parent 172ab7a commit 47b5908

File tree

3 files changed

+34
-11
lines changed

3 files changed

+34
-11
lines changed

doc/source/whatsnew/v0.23.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Groupby/Resample/Rolling
5050
^^^^^^^^^^^^^^^^^^^^^^^^
5151

5252
- Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`)
53+
- Bug in `DataFrame.pct_change() and `Series.pct_change() where percent change on non-monotonic groups were calculated in a vectorized way (:issue:`21200`)
5354

5455
Strings
5556
^^^^^^^

pandas/core/groupby/groupby.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -2070,7 +2070,7 @@ def shift(self, periods=1, freq=None, axis=0):
20702070
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
20712071
axis=0):
20722072
"""Calcuate pct_change of each value to previous entry in group"""
2073-
if freq is not None or axis != 0:
2073+
if (freq is not None or axis != 0) or not self.grouper.is_monotonic:
20742074
return self.apply(lambda x: x.pct_change(periods=periods,
20752075
fill_method=fill_method,
20762076
limit=limit, freq=freq,
@@ -3942,7 +3942,12 @@ def _apply_to_column_groupbys(self, func):
39423942
return func(self)
39433943

39443944
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None):
3945-
"""Calculate percent change of each value to previous entry in group"""
3945+
"""Calcuate pct_change of each value to previous entry in group"""
3946+
if not self.grouper.is_monotonic:
3947+
return self.apply(lambda x: x.pct_change(periods=periods,
3948+
fill_method=fill_method,
3949+
limit=limit, freq=freq))
3950+
39463951
filled = getattr(self, fill_method)(limit=limit)
39473952
shifted = filled.shift(periods=periods, freq=freq)
39483953

pandas/tests/groupby/test_transform.py

+26-9
Original file line numberDiff line numberDiff line change
@@ -722,35 +722,52 @@ def interweave(list_obj):
722722

723723

724724
@pytest.mark.parametrize("test_series", [True, False])
725+
@pytest.mark.parametrize("shuffle", [True, False])
725726
@pytest.mark.parametrize("periods,fill_method,limit", [
726727
(1, 'ffill', None), (1, 'ffill', 1),
727728
(1, 'bfill', None), (1, 'bfill', 1),
728729
(-1, 'ffill', None), (-1, 'ffill', 1),
729730
(-1, 'bfill', None), (-1, 'bfill', 1)])
730-
def test_pct_change(test_series, periods, fill_method, limit):
731+
def test_pct_change(test_series, shuffle, periods, fill_method, limit):
732+
# Groupby pct change uses an apply if monotonic and a vectorized operation if non-monotonic
733+
# Shuffle parameter tests each
731734
vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
732-
exp_vals = Series(vals).pct_change(periods=periods,
733-
fill_method=fill_method,
734-
limit=limit).tolist()
735-
736-
df = DataFrame({'key': ['a'] * len(vals) + ['b'] * len(vals),
735+
keys = ['a', 'b']
736+
df = DataFrame({'key': [k for j in list(map(lambda x: [x] * len(vals), keys)) for k in j],
737737
'vals': vals * 2})
738+
if shuffle:
739+
df = df.reindex(np.random.permutation(len(df))).reset_index(drop=True)
740+
741+
manual_apply = []
742+
for k in keys:
743+
manual_apply.append(Series(df.loc[df.key == k, 'vals'].values).pct_change(periods=periods,
744+
fill_method=fill_method,
745+
limit=limit))
746+
exp_vals = pd.concat(manual_apply).reset_index(drop=True)
747+
exp = pd.DataFrame(exp_vals, columns=['_pct_change'])
738748
grp = df.groupby('key')
739749

740750
def get_result(grp_obj):
741751
return grp_obj.pct_change(periods=periods,
742752
fill_method=fill_method,
743753
limit=limit)
744754

755+
# Specifically test when monotonic and not monotonic
756+
745757
if test_series:
746-
exp = pd.Series(exp_vals * 2)
747-
exp.name = 'vals'
758+
exp = exp.loc[:, '_pct_change']
748759
grp = grp['vals']
749760
result = get_result(grp)
761+
# Resort order by keys to compare to expected values
762+
df.insert(0, '_pct_change', result)
763+
result = df.sort_values(by='key')
764+
result = result.loc[:, '_pct_change']
765+
result = result.reset_index(drop=True)
750766
tm.assert_series_equal(result, exp)
751767
else:
752-
exp = DataFrame({'vals': exp_vals * 2})
753768
result = get_result(grp)
769+
result.reset_index(drop=True, inplace=True)
770+
result.columns = ['_pct_change']
754771
tm.assert_frame_equal(result, exp)
755772

756773

0 commit comments

Comments
 (0)