Skip to content

Cythonized GroupBy pct_change #19919

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,7 @@ Performance Improvements
- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`)
- Improved performance of :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` (:issue:`11296`)
- Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`)
- Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`)

.. _whatsnew_0230.docs:

Expand Down
24 changes: 24 additions & 0 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2044,6 +2044,23 @@ def shift(self, periods=1, freq=None, axis=0):
result_is_index=True,
periods=periods)

@Substitution(name='groupby')
@Appender(_doc_template)
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
axis=0):
"""Calcuate pct_change of each value to previous entry in group"""
if freq is not None or axis != 0:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I copied this from the shift implementation but I'm considering moving this into a decorator or pushing into the _get_cythonized_result methods as it is useful for quite a few functions

return self.apply(lambda x: x.pct_change(periods=periods,
fill_method=fill_method,
limit=limit, freq=freq,
axis=axis))

filled = getattr(self, fill_method)(limit=limit).drop(
self.grouper.names, axis=1)
shifted = filled.shift(periods=periods, freq=freq)

return (filled / shifted) - 1

@Substitution(name='groupby')
@Appender(_doc_template)
def head(self, n=5):
Expand Down Expand Up @@ -3884,6 +3901,13 @@ def _apply_to_column_groupbys(self, func):
""" return a pass thru """
return func(self)

def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None):
"""Calculate percent change of each value to previous entry in group"""
filled = getattr(self, fill_method)(limit=limit)
shifted = filled.shift(periods=periods, freq=freq)

return (filled / shifted) - 1


class NDFrameGroupBy(GroupBy):

Expand Down
55 changes: 0 additions & 55 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2062,61 +2062,6 @@ def test_rank_object_raises(self, ties_method, ascending, na_option,
ascending=ascending,
na_option=na_option, pct=pct)

@pytest.mark.parametrize("mix_groupings", [True, False])
@pytest.mark.parametrize("as_series", [True, False])
@pytest.mark.parametrize("val1,val2", [
('foo', 'bar'), (1, 2), (1., 2.)])
@pytest.mark.parametrize("fill_method,limit,exp_vals", [
("ffill", None,
[np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
("ffill", 1,
[np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
("bfill", None,
['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
("bfill", 1,
[np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
])
def test_group_fill_methods(self, mix_groupings, as_series, val1, val2,
fill_method, limit, exp_vals):
vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
_exp_vals = list(exp_vals)
# Overwrite placeholder values
for index, exp_val in enumerate(_exp_vals):
if exp_val == 'val1':
_exp_vals[index] = val1
elif exp_val == 'val2':
_exp_vals[index] = val2

# Need to modify values and expectations depending on the
# Series / DataFrame that we ultimately want to generate
if mix_groupings: # ['a', 'b', 'a, 'b', ...]
keys = ['a', 'b'] * len(vals)

def interweave(list_obj):
temp = list()
for x in list_obj:
temp.extend([x, x])

return temp

_exp_vals = interweave(_exp_vals)
vals = interweave(vals)
else: # ['a', 'a', 'a', ... 'b', 'b', 'b']
keys = ['a'] * len(vals) + ['b'] * len(vals)
_exp_vals = _exp_vals * 2
vals = vals * 2

df = DataFrame({'key': keys, 'val': vals})
if as_series:
result = getattr(
df.groupby('key')['val'], fill_method)(limit=limit)
exp = Series(_exp_vals, name='val')
assert_series_equal(result, exp)
else:
result = getattr(df.groupby('key'), fill_method)(limit=limit)
exp = DataFrame({'key': keys, 'val': _exp_vals})
assert_frame_equal(result, exp)

@pytest.mark.parametrize("agg_func", ['any', 'all'])
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("vals", [
Expand Down
87 changes: 87 additions & 0 deletions pandas/tests/groupby/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,3 +636,90 @@ def test_transform_numeric_ret(self, cols, exp, comp_func, agg_func):
exp = exp.astype('float')

comp_func(result, exp)

@pytest.mark.parametrize("mix_groupings", [True, False])
@pytest.mark.parametrize("as_series", [True, False])
@pytest.mark.parametrize("val1,val2", [
('foo', 'bar'), (1, 2), (1., 2.)])
@pytest.mark.parametrize("fill_method,limit,exp_vals", [
("ffill", None,
[np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
("ffill", 1,
[np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
("bfill", None,
['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
("bfill", 1,
[np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
])
def test_group_fill_methods(self, mix_groupings, as_series, val1, val2,
fill_method, limit, exp_vals):
vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
_exp_vals = list(exp_vals)
# Overwrite placeholder values
for index, exp_val in enumerate(_exp_vals):
if exp_val == 'val1':
_exp_vals[index] = val1
elif exp_val == 'val2':
_exp_vals[index] = val2

# Need to modify values and expectations depending on the
# Series / DataFrame that we ultimately want to generate
if mix_groupings: # ['a', 'b', 'a, 'b', ...]
keys = ['a', 'b'] * len(vals)

def interweave(list_obj):
temp = list()
for x in list_obj:
temp.extend([x, x])

return temp

_exp_vals = interweave(_exp_vals)
vals = interweave(vals)
else: # ['a', 'a', 'a', ... 'b', 'b', 'b']
keys = ['a'] * len(vals) + ['b'] * len(vals)
_exp_vals = _exp_vals * 2
vals = vals * 2

df = DataFrame({'key': keys, 'val': vals})
if as_series:
result = getattr(
df.groupby('key')['val'], fill_method)(limit=limit)
exp = Series(_exp_vals, name='val')
assert_series_equal(result, exp)
else:
result = getattr(df.groupby('key'), fill_method)(limit=limit)
exp = DataFrame({'key': keys, 'val': _exp_vals})
assert_frame_equal(result, exp)

@pytest.mark.parametrize("test_series", [True, False])
@pytest.mark.parametrize("periods,fill_method,limit", [
(1, 'ffill', None), (1, 'ffill', 1),
(1, 'bfill', None), (1, 'bfill', 1),
(-1, 'ffill', None), (-1, 'ffill', 1),
(-1, 'bfill', None), (-1, 'bfill', 1)])
def test_pct_change(self, test_series, periods, fill_method, limit):
vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
exp_vals = Series(vals).pct_change(periods=periods,
fill_method=fill_method,
limit=limit).tolist()

df = DataFrame({'key': ['a'] * len(vals) + ['b'] * len(vals),
'vals': vals * 2})
grp = df.groupby('key')

def get_result(grp_obj):
return grp_obj.pct_change(periods=periods,
fill_method=fill_method,
limit=limit)

if test_series:
exp = pd.Series(exp_vals * 2)
exp.name = 'vals'
grp = grp['vals']
result = get_result(grp)
tm.assert_series_equal(result, exp)
else:
exp = DataFrame({'vals': exp_vals * 2})
result = get_result(grp)
tm.assert_frame_equal(result, exp)