Skip to content

BUG: no need to validate monotonicity when groupby-rolling #15175

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ Bug Fixes




- Bug in ``.groupby(...).rolling(...)`` when the ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`)



Expand Down
49 changes: 36 additions & 13 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,19 +1025,8 @@ def validate(self):
if (self.is_datetimelike and
isinstance(self.window, (compat.string_types, DateOffset))):

# must be monotonic for on
if not self._on.is_monotonic:
formatted = self.on or 'index'
raise ValueError("{0} must be "
"monotonic".format(formatted))

from pandas.tseries.frequencies import to_offset
try:
freq = to_offset(self.window)
except (TypeError, ValueError):
raise ValueError("passed window {0} in not "
"compat with a datetimelike "
"index".format(self.window))
self._validate_monotonic()
freq = self._validate_freq()

# we don't allow center
if self.center:
Expand All @@ -1058,6 +1047,23 @@ def validate(self):
elif self.window < 0:
raise ValueError("window must be non-negative")

def _validate_monotonic(self):
""" validate on is monotonic """
if not self._on.is_monotonic:
formatted = self.on or 'index'
raise ValueError("{0} must be "
"monotonic".format(formatted))

def _validate_freq(self):
""" validate & return our freq """
from pandas.tseries.frequencies import to_offset
try:
return to_offset(self.window)
except (TypeError, ValueError):
raise ValueError("passed window {0} in not "
"compat with a datetimelike "
"index".format(self.window))

@Substitution(name='rolling')
@Appender(SelectionMixin._see_also_template)
@Appender(SelectionMixin._agg_doc)
Expand Down Expand Up @@ -1175,6 +1181,23 @@ class RollingGroupby(_GroupByMixin, Rolling):
def _constructor(self):
return Rolling

def _gotitem(self, key, ndim, subset=None):

# we are resetting the actual object here so our
# index is carried thru to the selected obj
# when we do the splitting for the groupby
if self.on is not None:
self._groupby.obj = self._groupby.obj.set_index(self._on)
self.on = None
return super(RollingGroupby, self)._gotitem(key, ndim, subset=subset)

def _validate_monotonic(self):
"""
validate on is monotonic;
we don't care for groupby.rolling
"""
pass


class Expanding(_Rolling_and_Expanding):
"""
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -3616,3 +3616,23 @@ def agg_by_day(x):
agg_by_day).reset_index(level=0, drop=True)

tm.assert_frame_equal(result, expected)

def test_groupby_monotonic(self):

# GH 15130
# we don't need to validate monotonicity when grouping

data = [
['David', '1/1/2015', 100], ['David', '1/5/2015', 500],
['David', '5/30/2015', 50], ['David', '7/25/2015', 50],
['Ryan', '1/4/2014', 100], ['Ryan', '1/19/2015', 500],
['Ryan', '3/31/2016', 50], ['Joe', '7/1/2015', 100],
['Joe', '9/9/2015', 500], ['Joe', '10/15/2015', 50]]

df = pd.DataFrame(data=data, columns=['name', 'date', 'amount'])
df['date'] = pd.to_datetime(df['date'])

expected = df.set_index('date').groupby('name').apply(
lambda x: x.rolling('180D')['amount'].sum())
result = df.groupby('name').rolling('180D', on='date')['amount'].sum()
tm.assert_series_equal(result, expected)