Skip to content

Commit 7322239

Browse files
carlosdanielcsantosjreback
authored andcommitted
ENH: Rolling window endpoints inclusion
closes pandas-dev#13965 Author: carlosdanielcsantos <[email protected]> Author: carlosdanielcsantos <[email protected]> Author: carlosdanielcsantos <[email protected]> Closes pandas-dev#15795 from carlosdanielcsantos/rwindow-endpoints-inclusion and squashes the following commits: aad97dc [carlosdanielcsantos] Updating docs 568c12f [carlosdanielcsantos] Innocuous change to rerun tests 037b84e [carlosdanielcsantos] Fixing style c18a31b [carlosdanielcsantos] Fixing test of assertion of closed parameter in fixed windows Style corrections 90dfb0c [carlosdanielcsantos] Correcting bug in window validation 8bd336a [carlosdanielcsantos] Almost there 306b9f7 [carlosdanielcsantos] Commiting progress on default=None. Still not tested Adding computation.rst section (still not written) ec4bbc7 [carlosdanielcsantos] Changing l_closed and r_closed variable names 0e8e65c [carlosdanielcsantos] Adding doc-strings and PEP8 corrections 5eaf3b4 [carlosdanielcsantos] str closed -> object closed Adding test of assert for closed parameter Adding assert for closed parameter in get_window_indexer 2cf6804 [carlosdanielcsantos] Time-based windows working da034bf [carlosdanielcsantos] Commiting progress 34f1309 [carlosdanielcsantos] Adding window slicing endpoint inclusion selection to VariableWindowIndexer
1 parent 1c4dacb commit 7322239

File tree

6 files changed

+222
-51
lines changed

6 files changed

+222
-51
lines changed

doc/source/computation.rst

+42
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,48 @@ default of the index) in a DataFrame.
459459
dft
460460
dft.rolling('2s', on='foo').sum()
461461
462+
.. _stats.rolling_window.endpoints:
463+
464+
Rolling Window Endpoints
465+
~~~~~~~~~~~~~~~~~~~~~~~~
466+
467+
.. versionadded:: 0.20.0
468+
469+
The inclusion of the interval endpoints in rolling window calculations can be specified with the ``closed``
470+
parameter:
471+
472+
.. csv-table::
473+
:header: "``closed``", "Description", "Default for"
474+
:widths: 20, 30, 30
475+
476+
``right``, close right endpoint, time-based windows
477+
``left``, close left endpoint,
478+
``both``, close both endpoints, fixed windows
479+
``neither``, open endpoints,
480+
481+
For example, having the right endpoint open is useful in many problems that require that there is no contamination
482+
from present information back to past information. This allows the rolling window to compute statistics
483+
"up to that point in time", but not including that point in time.
484+
485+
.. ipython:: python
486+
487+
df = pd.DataFrame({'x': 1},
488+
index = [pd.Timestamp('20130101 09:00:01'),
489+
pd.Timestamp('20130101 09:00:02'),
490+
pd.Timestamp('20130101 09:00:03'),
491+
pd.Timestamp('20130101 09:00:04'),
492+
pd.Timestamp('20130101 09:00:06')])
493+
494+
df["right"] = df.rolling('2s', closed='right').x.sum() # default
495+
df["both"] = df.rolling('2s', closed='both').x.sum()
496+
df["left"] = df.rolling('2s', closed='left').x.sum()
497+
df["neither"] = df.rolling('2s', closed='neither').x.sum()
498+
499+
df
500+
501+
Currently, this feature is only implemented for time-based windows.
502+
For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed.
503+
462504
.. _stats.moments.ts-versus-resampling:
463505

464506
Time-aware Rolling vs. Resampling

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you
319319
Other Enhancements
320320
^^^^^^^^^^^^^^^^^^
321321

322+
- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window endpoint closedness. See the :ref:`documentation <stats.rolling_window.endpoints>` (:issue:`13965`)
322323
- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here <io.feather>`.
323324
- ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`)
324325
- ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`)

pandas/core/generic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -5962,12 +5962,12 @@ def _add_series_or_dataframe_operations(cls):
59625962

59635963
@Appender(rwindow.rolling.__doc__)
59645964
def rolling(self, window, min_periods=None, freq=None, center=False,
5965-
win_type=None, on=None, axis=0):
5965+
win_type=None, on=None, axis=0, closed=None):
59665966
axis = self._get_axis_number(axis)
59675967
return rwindow.rolling(self, window=window,
59685968
min_periods=min_periods, freq=freq,
59695969
center=center, win_type=win_type,
5970-
on=on, axis=axis)
5970+
on=on, axis=axis, closed=closed)
59715971

59725972
cls.rolling = rolling
59735973

pandas/core/window.py

+32-14
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,12 @@
5656

5757
class _Window(PandasObject, SelectionMixin):
5858
_attributes = ['window', 'min_periods', 'freq', 'center', 'win_type',
59-
'axis', 'on']
59+
'axis', 'on', 'closed']
6060
exclusions = set()
6161

6262
def __init__(self, obj, window=None, min_periods=None, freq=None,
63-
center=False, win_type=None, axis=0, on=None, **kwargs):
63+
center=False, win_type=None, axis=0, on=None, closed=None,
64+
**kwargs):
6465

6566
if freq is not None:
6667
warnings.warn("The freq kw is deprecated and will be removed in a "
@@ -71,6 +72,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None,
7172
self.blocks = []
7273
self.obj = obj
7374
self.on = on
75+
self.closed = closed
7476
self.window = window
7577
self.min_periods = min_periods
7678
self.freq = freq
@@ -101,6 +103,10 @@ def validate(self):
101103
if self.min_periods is not None and not \
102104
is_integer(self.min_periods):
103105
raise ValueError("min_periods must be an integer")
106+
if self.closed is not None and self.closed not in \
107+
['right', 'both', 'left', 'neither']:
108+
raise ValueError("closed must be 'right', 'left', 'both' or "
109+
"'neither'")
104110

105111
def _convert_freq(self, how=None):
106112
""" resample according to the how, return a new object """
@@ -374,8 +380,14 @@ class Window(_Window):
374380
on : string, optional
375381
For a DataFrame, column on which to calculate
376382
the rolling window, rather than the index
383+
closed : string, default None
384+
Make the interval closed on the 'right', 'left', 'both' or
385+
'neither' endpoints.
386+
For offset-based windows, it defaults to 'right'.
387+
For fixed windows, defaults to 'both'. Remaining cases not implemented
388+
for fixed windows.
377389
378-
.. versionadded:: 0.19.0
390+
.. versionadded:: 0.20.0
379391
380392
axis : int or string, default 0
381393
@@ -717,12 +729,12 @@ def _apply(self, func, name=None, window=None, center=None,
717729
raise ValueError("we do not support this function "
718730
"in _window.{0}".format(func))
719731

720-
def func(arg, window, min_periods=None):
732+
def func(arg, window, min_periods=None, closed=None):
721733
minp = check_minp(min_periods, window)
722734
# ensure we are only rolling on floats
723735
arg = _ensure_float64(arg)
724736
return cfunc(arg,
725-
window, minp, indexi, **kwargs)
737+
window, minp, indexi, closed, **kwargs)
726738

727739
# calculation function
728740
if center:
@@ -731,11 +743,13 @@ def func(arg, window, min_periods=None):
731743

732744
def calc(x):
733745
return func(np.concatenate((x, additional_nans)),
734-
window, min_periods=self.min_periods)
746+
window, min_periods=self.min_periods,
747+
closed=self.closed)
735748
else:
736749

737750
def calc(x):
738-
return func(x, window, min_periods=self.min_periods)
751+
return func(x, window, min_periods=self.min_periods,
752+
closed=self.closed)
739753

740754
with np.errstate(all='ignore'):
741755
if values.ndim > 1:
@@ -768,7 +782,8 @@ def count(self):
768782
for b in blocks:
769783
result = b.notnull().astype(int)
770784
result = self._constructor(result, window=window, min_periods=0,
771-
center=self.center).sum()
785+
center=self.center,
786+
closed=self.closed).sum()
772787
results.append(result)
773788

774789
return self._wrap_results(results, blocks, obj)
@@ -789,11 +804,10 @@ def apply(self, func, args=(), kwargs={}):
789804
offset = _offset(window, self.center)
790805
index, indexi = self._get_index()
791806

792-
def f(arg, window, min_periods):
807+
def f(arg, window, min_periods, closed):
793808
minp = _use_window(min_periods, window)
794-
return _window.roll_generic(arg, window, minp, indexi,
795-
offset, func, args,
796-
kwargs)
809+
return _window.roll_generic(arg, window, minp, indexi, closed,
810+
offset, func, args, kwargs)
797811

798812
return self._apply(f, func, args=args, kwargs=kwargs,
799813
center=False)
@@ -864,7 +878,7 @@ def std(self, ddof=1, *args, **kwargs):
864878
def f(arg, *args, **kwargs):
865879
minp = _require_min_periods(1)(self.min_periods, window)
866880
return _zsqrt(_window.roll_var(arg, window, minp, indexi,
867-
ddof))
881+
self.closed, ddof))
868882

869883
return self._apply(f, 'std', check_minp=_require_min_periods(1),
870884
ddof=ddof, **kwargs)
@@ -911,7 +925,7 @@ def quantile(self, quantile, **kwargs):
911925
def f(arg, *args, **kwargs):
912926
minp = _use_window(self.min_periods, window)
913927
return _window.roll_quantile(arg, window, minp, indexi,
914-
quantile)
928+
self.closed, quantile)
915929

916930
return self._apply(f, 'quantile', quantile=quantile,
917931
**kwargs)
@@ -1044,6 +1058,10 @@ def validate(self):
10441058
elif self.window < 0:
10451059
raise ValueError("window must be non-negative")
10461060

1061+
if not self.is_datetimelike and self.closed is not None:
1062+
raise ValueError("closed only implemented for datetimelike "
1063+
"and offset based windows")
1064+
10471065
def _validate_monotonic(self):
10481066
""" validate on is monotonic """
10491067
if not self._on.is_monotonic:

0 commit comments

Comments
 (0)