Skip to content

Commit bda1452

Browse files
committed
ENH: centered moving window functions #1270
1 parent 428803a commit bda1452

File tree

2 files changed

+125
-44
lines changed

2 files changed

+125
-44
lines changed

pandas/stats/moments.py

+79-32
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from numpy import NaN
1010
import numpy as np
1111

12-
from pandas.core.api import DataFrame, Series, notnull
12+
from pandas.core.api import DataFrame, Series, notnull, Panel
1313
import pandas.lib as lib
1414

1515
from pandas.util.decorators import Substitution, Appender
@@ -124,7 +124,7 @@
124124
"""
125125

126126

127-
def rolling_count(arg, window, freq=None, time_rule=None):
127+
def rolling_count(arg, window, freq=None, center=False, time_rule=None):
128128
"""
129129
Rolling count of number of non-NaN observations inside provided window.
130130
@@ -134,6 +134,8 @@ def rolling_count(arg, window, freq=None, time_rule=None):
134134
window : Number of observations used for calculating statistic
135135
freq : None or string alias / date offset object, default=None
136136
Frequency to conform to before computing statistic
137+
center : boolean, default False
138+
Whether the label should correspond with center of window
137139
138140
Returns
139141
-------
@@ -146,7 +148,7 @@ def rolling_count(arg, window, freq=None, time_rule=None):
146148

147149
converted = np.isfinite(values).astype(float)
148150
result = rolling_sum(converted, window, min_periods=1,
149-
time_rule=time_rule)
151+
center=center) # already converted
150152

151153
# putmask here?
152154
result[np.isnan(result)] = 0
@@ -156,22 +158,37 @@ def rolling_count(arg, window, freq=None, time_rule=None):
156158

157159
@Substitution("Unbiased moving covariance", _binary_arg_flex, _flex_retval)
158160
@Appender(_doc_template)
159-
def rolling_cov(arg1, arg2, window, min_periods=None, time_rule=None):
161+
def rolling_cov(arg1, arg2, window, min_periods=None, freq=None,
162+
center=False, time_rule=None):
163+
arg1 = _conv_timerule(arg1, freq, time_rule)
164+
arg2 = _conv_timerule(arg2, freq, time_rule)
165+
window = min(window, len(arg1), len(arg2))
160166
def _get_cov(X, Y):
161-
mean = lambda x: rolling_mean(x, window, min_periods, time_rule)
162-
count = rolling_count(X + Y, window, time_rule)
167+
mean = lambda x: rolling_mean(x, window, min_periods)
168+
count = rolling_count(X + Y, window)
163169
bias_adj = count / (count - 1)
164170
return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
165-
return _flex_binary_moment(arg1, arg2, _get_cov)
166-
171+
rs = _flex_binary_moment(arg1, arg2, _get_cov)
172+
if center:
173+
if isinstance(rs, (Series, DataFrame, Panel)):
174+
rs = rs.shift(-int((window + 1) / 2.))
175+
else:
176+
offset = int((window + 1) / 2.)
177+
rs[:-offset] = rs[offset:]
178+
rs[-offset:] = np.nan
179+
return rs
167180

168181
@Substitution("Moving sample correlation", _binary_arg_flex, _flex_retval)
169182
@Appender(_doc_template)
170-
def rolling_corr(arg1, arg2, window, min_periods=None, time_rule=None):
183+
def rolling_corr(arg1, arg2, window, min_periods=None, freq=None,
184+
center=False, time_rule=None):
171185
def _get_corr(a, b):
172-
num = rolling_cov(a, b, window, min_periods, time_rule)
173-
den = (rolling_std(a, window, min_periods, time_rule) *
174-
rolling_std(b, window, min_periods, time_rule))
186+
num = rolling_cov(a, b, window, min_periods, freq=freq,
187+
center=center, time_rule=time_rule)
188+
den = (rolling_std(a, window, min_periods, freq=freq,
189+
center=center, time_rule=time_rule) *
190+
rolling_std(b, window, min_periods, freq=freq,
191+
center=center, time_rule=time_rule))
175192
return num / den
176193
return _flex_binary_moment(arg1, arg2, _get_corr)
177194

@@ -234,7 +251,7 @@ def rolling_corr_pairwise(df, window, min_periods=None):
234251

235252

236253
def _rolling_moment(arg, window, func, minp, axis=0, freq=None,
237-
time_rule=None, **kwargs):
254+
center=False, time_rule=None, **kwargs):
238255
"""
239256
Rolling statistical measure using supplied function. Designed to be
240257
used with passed-in Cython array-based functions.
@@ -249,6 +266,8 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None,
249266
axis : int, default 0
250267
freq : None or string alias / date offset object, default=None
251268
Frequency to conform to before computing statistic
269+
center : boolean, default False
270+
Whether the label should correspond with center of window
252271
253272
Returns
254273
-------
@@ -260,8 +279,16 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None,
260279
# actually calculate the moment. Faster way to do this?
261280
result = np.apply_along_axis(calc, axis, values)
262281

263-
return return_hook(result)
282+
rs = return_hook(result)
283+
if center:
284+
if isinstance(rs, (Series, DataFrame, Panel)):
285+
rs = rs.shift(-int((window + 1) / 2.), axis=axis)
286+
else:
287+
offset = int((window + 1)/ 2.)
288+
rs[:-offset] = rs[offset:]
289+
rs[-offset:] = np.nan
264290

291+
return rs
265292

266293
def _process_data_structure(arg, kill_inf=True):
267294
if isinstance(arg, DataFrame):
@@ -450,12 +477,14 @@ def _rolling_func(func, desc, check_minp=_use_window):
450477
@Substitution(desc, _unary_arg, _type_of_input)
451478
@Appender(_doc_template)
452479
@wraps(func)
453-
def f(arg, window, min_periods=None, freq=None, time_rule=None, **kwargs):
480+
def f(arg, window, min_periods=None, freq=None, center=False,
481+
time_rule=None, **kwargs):
454482
def call_cython(arg, window, minp, **kwds):
455483
minp = check_minp(minp, window)
456484
return func(arg, window, minp, **kwds)
457485
return _rolling_moment(arg, window, call_cython, min_periods,
458-
freq=freq, time_rule=time_rule, **kwargs)
486+
freq=freq, center=center,
487+
time_rule=time_rule, **kwargs)
459488

460489
return f
461490

@@ -477,7 +506,7 @@ def call_cython(arg, window, minp, **kwds):
477506

478507

479508
def rolling_quantile(arg, window, quantile, min_periods=None, freq=None,
480-
time_rule=None):
509+
center=False, time_rule=None):
481510
"""Moving quantile
482511
483512
Parameters
@@ -489,6 +518,8 @@ def rolling_quantile(arg, window, quantile, min_periods=None, freq=None,
489518
Minimum number of observations in window required to have a value
490519
freq : None or string alias / date offset object, default=None
491520
Frequency to conform to before computing statistic
521+
center : boolean, default False
522+
Whether the label should correspond with center of window
492523
493524
Returns
494525
-------
@@ -499,11 +530,11 @@ def call_cython(arg, window, minp):
499530
minp = _use_window(minp, window)
500531
return lib.roll_quantile(arg, window, minp, quantile)
501532
return _rolling_moment(arg, window, call_cython, min_periods,
502-
freq=freq, time_rule=time_rule)
533+
freq=freq, center=center, time_rule=time_rule)
503534

504535

505536
def rolling_apply(arg, window, func, min_periods=None, freq=None,
506-
time_rule=None):
537+
center=False, time_rule=None):
507538
"""Generic moving function application
508539
509540
Parameters
@@ -516,6 +547,8 @@ def rolling_apply(arg, window, func, min_periods=None, freq=None,
516547
Minimum number of observations in window required to have a value
517548
freq : None or string alias / date offset object, default=None
518549
Frequency to conform to before computing statistic
550+
center : boolean, default False
551+
Whether the label should correspond with center of window
519552
520553
Returns
521554
-------
@@ -525,21 +558,23 @@ def call_cython(arg, window, minp):
525558
minp = _use_window(minp, window)
526559
return lib.roll_generic(arg, window, minp, func)
527560
return _rolling_moment(arg, window, call_cython, min_periods,
528-
freq=freq, time_rule=time_rule)
561+
freq=freq, center=center, time_rule=time_rule)
529562

530563

531564
def _expanding_func(func, desc, check_minp=_use_window):
532565
@Substitution(desc, _unary_arg, _type_of_input)
533566
@Appender(_expanding_doc)
534567
@wraps(func)
535-
def f(arg, min_periods=1, freq=None, time_rule=None, **kwargs):
568+
def f(arg, min_periods=1, freq=None, center=False, time_rule=None,
569+
**kwargs):
536570
window = len(arg)
537571

538572
def call_cython(arg, window, minp, **kwds):
539573
minp = check_minp(minp, window)
540574
return func(arg, window, minp, **kwds)
541575
return _rolling_moment(arg, window, call_cython, min_periods,
542-
freq=freq, time_rule=time_rule, **kwargs)
576+
freq=freq, center=center,
577+
time_rule=time_rule, **kwargs)
543578

544579
return f
545580

@@ -560,7 +595,7 @@ def call_cython(arg, window, minp, **kwds):
560595
check_minp=_require_min_periods(4))
561596

562597

563-
def expanding_count(arg, freq=None, time_rule=None):
598+
def expanding_count(arg, freq=None, center=False, time_rule=None):
564599
"""
565600
Expanding count of number of non-NaN observations.
566601
@@ -569,16 +604,19 @@ def expanding_count(arg, freq=None, time_rule=None):
569604
arg : DataFrame or numpy ndarray-like
570605
freq : None or string alias / date offset object, default=None
571606
Frequency to conform to before computing statistic
607+
center : boolean, default False
608+
Whether the label should correspond with center of window
572609
573610
Returns
574611
-------
575612
expanding_count : type of caller
576613
"""
577-
return rolling_count(arg, len(arg), freq=freq, time_rule=time_rule)
614+
return rolling_count(arg, len(arg), freq=freq, center=center,
615+
time_rule=time_rule)
578616

579617

580618
def expanding_quantile(arg, quantile, min_periods=1, freq=None,
581-
time_rule=None):
619+
center=False, time_rule=None):
582620
"""Expanding quantile
583621
584622
Parameters
@@ -589,29 +627,35 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None,
589627
Minimum number of observations in window required to have a value
590628
freq : None or string alias / date offset object, default=None
591629
Frequency to conform to before computing statistic
630+
center : boolean, default False
631+
Whether the label should correspond with center of window
592632
593633
Returns
594634
-------
595635
y : type of input argument
596636
"""
597637
return rolling_quantile(arg, len(arg), quantile, min_periods=min_periods,
598-
freq=freq, time_rule=time_rule)
638+
freq=freq, center=center, time_rule=time_rule)
599639

600640

601641
@Substitution("Unbiased expanding covariance", _binary_arg_flex, _flex_retval)
602642
@Appender(_expanding_doc)
603-
def expanding_cov(arg1, arg2, min_periods=1, time_rule=None):
643+
def expanding_cov(arg1, arg2, min_periods=1, freq=None, center=False,
644+
time_rule=None):
604645
window = max(len(arg1), len(arg2))
605646
return rolling_cov(arg1, arg2, window,
606-
min_periods=min_periods, time_rule=time_rule)
647+
min_periods=min_periods, freq=freq,
648+
center=center, time_rule=time_rule)
607649

608650

609651
@Substitution("Expanding sample correlation", _binary_arg_flex, _flex_retval)
610652
@Appender(_expanding_doc)
611-
def expanding_corr(arg1, arg2, min_periods=1, time_rule=None):
653+
def expanding_corr(arg1, arg2, min_periods=1, freq=None, center=False,
654+
time_rule=None):
612655
window = max(len(arg1), len(arg2))
613656
return rolling_corr(arg1, arg2, window,
614-
min_periods=min_periods, time_rule=time_rule)
657+
min_periods=min_periods,
658+
freq=freq, center=center, time_rule=time_rule)
615659

616660

617661
def expanding_corr_pairwise(df, min_periods=1):
@@ -634,7 +678,8 @@ def expanding_corr_pairwise(df, min_periods=1):
634678
return rolling_corr_pairwise(df, window, min_periods=min_periods)
635679

636680

637-
def expanding_apply(arg, func, min_periods=1, freq=None, time_rule=None):
681+
def expanding_apply(arg, func, min_periods=1, freq=None, center=False,
682+
time_rule=None):
638683
"""Generic expanding function application
639684
640685
Parameters
@@ -646,11 +691,13 @@ def expanding_apply(arg, func, min_periods=1, freq=None, time_rule=None):
646691
Minimum number of observations in window required to have a value
647692
freq : None or string alias / date offset object, default=None
648693
Frequency to conform to before computing statistic
694+
center : boolean, default False
695+
Whether the label should correspond with center of window
649696
650697
Returns
651698
-------
652699
y : type of input argument
653700
"""
654701
window = len(arg)
655702
return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq,
656-
time_rule=time_rule)
703+
center=center, time_rule=time_rule)

0 commit comments

Comments
 (0)