Skip to content

Commit bc8551e

Browse files
committed
TST/ENH: finish implementing centered moving window functions from scikits.timeseries
1 parent d9d59f6 commit bc8551e

File tree

3 files changed

+184
-38
lines changed

3 files changed

+184
-38
lines changed

pandas/src/moments.pyx

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -952,3 +952,70 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win,
952952
bufarr.data = <char*> oldbuf
953953

954954
return output
955+
956+
def roll_window(ndarray[float64_t, ndim=1, cast=True] input,
957+
ndarray[float64_t, ndim=1, cast=True] weights,
958+
int minp, bint avg=True, bint avg_wgt=False):
959+
"""
960+
Assume len(weights) << len(input)
961+
"""
962+
cdef:
963+
ndarray[double_t] output, tot_wgt
964+
ndarray[int64_t] counts
965+
Py_ssize_t in_i, win_i, win_n, win_k, in_n, in_k
966+
float64_t val_in, val_win, c, w
967+
968+
in_n = len(input)
969+
win_n = len(weights)
970+
output = np.zeros(in_n, dtype=float)
971+
counts = np.zeros(in_n, dtype=int)
972+
if avg:
973+
tot_wgt = np.zeros(in_n, dtype=float)
974+
975+
minp = _check_minp(len(weights), minp, in_n)
976+
977+
if avg_wgt:
978+
for win_i from 0 <= win_i < win_n:
979+
val_win = weights[win_i]
980+
if val_win != val_win:
981+
continue
982+
983+
for in_i from 0 <= in_i < in_n - (win_n - win_i) + 1:
984+
val_in = input[in_i]
985+
if val_in == val_in:
986+
output[in_i + (win_n - win_i) - 1] += val_in * val_win
987+
counts[in_i + (win_n - win_i) - 1] += 1
988+
tot_wgt[in_i + (win_n - win_i) - 1] += val_win
989+
990+
for in_i from 0 <= in_i < in_n:
991+
c = counts[in_i]
992+
if c < minp:
993+
output[in_i] = NaN
994+
else:
995+
w = tot_wgt[in_i]
996+
if w == 0:
997+
output[in_i] = NaN
998+
else:
999+
output[in_i] /= tot_wgt[in_i]
1000+
1001+
else:
1002+
for win_i from 0 <= win_i < win_n:
1003+
val_win = weights[win_i]
1004+
if val_win != val_win:
1005+
continue
1006+
1007+
for in_i from 0 <= in_i < in_n - (win_n - win_i) + 1:
1008+
val_in = input[in_i]
1009+
1010+
if val_in == val_in:
1011+
output[in_i + (win_n - win_i) - 1] += val_in * val_win
1012+
counts[in_i + (win_n - win_i) - 1] += 1
1013+
1014+
for in_i from 0 <= in_i < in_n:
1015+
c = counts[in_i]
1016+
if c < minp:
1017+
output[in_i] = NaN
1018+
elif avg:
1019+
output[in_i] /= c
1020+
1021+
return output

pandas/stats/moments.py

Lines changed: 57 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from pandas.core.api import DataFrame, Series, notnull, Panel
1313
import pandas.lib as lib
14+
import pandas.core.common as com
1415

1516
from pandas.util.decorators import Substitution, Appender
1617

@@ -572,26 +573,29 @@ def call_cython(arg, window, minp):
572573
return _rolling_moment(arg, window, call_cython, min_periods,
573574
freq=freq, center=center, time_rule=time_rule)
574575

575-
def rolling_window(arg, window, window_type='boxcar', min_periods=None,
576-
freq=None, center=False, time_rule=None, **kwargs):
576+
def rolling_window(arg, window=None, win_type=None, min_periods=None,
577+
freq=None, center=False, mean=True, time_rule=None,
578+
axis=0, **kwargs):
577579
"""
578580
Applies a centered moving window of type ``window_type`` and size ``window``
579581
on the data.
580582
581583
Parameters
582584
----------
583-
data : Series, DataFrame
584-
window : int
585-
Size of the filtering window.
586-
window_type : str, default 'boxcar'
585+
arg : Series, DataFrame
586+
window : int or ndarray
587+
Filtering window specification. If the window is an integer, then it is
588+
treated as the window length and win_type is required
589+
win_type : str, default None
587590
Window type (see Notes)
588-
589591
min_periods : int
590-
Minimum number of observations in window required to have a value
592+
Minimum number of observations in window required to have a value.
591593
freq : None or string alias / date offset object, default=None
592594
Frequency to conform to before computing statistic
593595
center : boolean, default False
594596
Whether the label should correspond with center of window
597+
mean : boolean, default True
598+
If True computes weighted mean, else weighted sum
595599
596600
Returns
597601
-------
@@ -616,26 +620,52 @@ def rolling_window(arg, window, window_type='boxcar', min_periods=None,
616620
* ``general_gaussian`` (needs power, width)
617621
* ``slepian`` (needs width).
618622
"""
619-
from scipy.signal import convolve, get_window
620-
621-
data = marray(data, copy=True, subok=True)
622-
if data._mask is nomask:
623-
data._mask = np.zeros(data.shape, bool_)
624-
window = get_window(window_type, span, fftbins=False)
625-
(n, k) = (len(data), span//2)
626-
#
627-
if data.ndim == 1:
628-
data._data.flat = convolve(data._data, window)[k:n+k] / float(span)
629-
data._mask[:] = ((convolve(getmaskarray(data), window) > 0)[k:n+k])
630-
elif data.ndim == 2:
631-
for i in range(data.shape[-1]):
632-
_data = data._data[:,i]
633-
_data.flat = convolve(_data, window)[k:n+k] / float(span)
634-
data._mask[:,i] = (convolve(data._mask[:,i], window) > 0)[k:n+k]
623+
if isinstance(window, (list, tuple, np.ndarray)):
624+
if win_type is not None:
625+
raise ValueError(('Do not specify window type if using custom '
626+
'weights'))
627+
window = com._asarray_tuplesafe(window).astype(float)
628+
elif com.is_integer(window): #window size
629+
if win_type is None:
630+
raise ValueError('Must specify window type')
631+
import scipy.signal as sig
632+
win_type = _validate_win_type(win_type, kwargs) # may pop from kwargs
633+
window = sig.get_window(win_type, window).astype(float)
635634
else:
636-
raise ValueError, "Data should be at most 2D"
637-
data._mask[:k] = data._mask[-k:] = True
638-
return data
635+
raise ValueError('Invalid window %s' % str(window))
636+
637+
minp = _use_window(min_periods, len(window))
638+
639+
arg = _conv_timerule(arg, freq, time_rule)
640+
return_hook, values = _process_data_structure(arg)
641+
642+
f = lambda x: lib.roll_window(x, window, minp, avg=mean)
643+
result = np.apply_along_axis(f, axis, values)
644+
645+
rs = return_hook(result)
646+
if center:
647+
rs = _center_window(rs, len(window), axis)
648+
return rs
649+
650+
def _validate_win_type(win_type, kwargs):
651+
# may pop from kwargs
652+
arg_map = {'kaiser' : ['beta'],
653+
'gaussian' : ['std'],
654+
'general_gaussian' : ['power', 'width'],
655+
'slepian' : ['width']}
656+
if win_type in arg_map:
657+
return tuple([win_type] +
658+
_pop_args(win_type, arg_map[win_type], kwargs))
659+
return win_type
660+
661+
def _pop_args(win_type, arg_names, kwargs):
662+
msg = '%s window requires %%s' % win_type
663+
all_args = []
664+
for n in arg_names:
665+
if n not in kwargs:
666+
raise ValueError(msg % n)
667+
all_args.append(kwargs.pop(n))
668+
return all_args
639669

640670

641671
def _expanding_func(func, desc, check_minp=_use_window):

pandas/stats/tests/test_moments.py

Lines changed: 60 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,66 @@ def test_cmov_window(self):
7474
vals = np.random.randn(10)
7575
xp = cmov_window(vals, 5, 'boxcar')
7676

77-
rs = mom.rolling_window(vals, 5, center=True)
77+
rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
7878
assert_almost_equal(xp.compressed(), rs[2:-2])
7979
assert_almost_equal(xp.mask, np.isnan(rs))
8080

8181
xp = Series(rs)
82-
rs = mom.rolling_window(Series(vals), 5, center=True)
82+
rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True)
8383
assert_series_equal(xp, rs)
8484

85-
def test_cmov_window_types(self):
85+
def test_cmov_window_corner(self):
86+
try:
87+
from scikits.timeseries.lib import cmov_window
88+
except ImportError:
89+
raise nose.SkipTest
90+
91+
# all nan
92+
vals = np.empty(10, dtype=float)
93+
vals.fill(np.nan)
94+
rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
95+
self.assert_(np.isnan(rs).all())
96+
97+
# empty
98+
vals = np.array([])
99+
rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
100+
self.assert_(len(rs) == 0)
101+
102+
# shorter than window
103+
vals = np.random.randn(5)
104+
rs = mom.rolling_window(vals, 10, 'boxcar')
105+
self.assert_(np.isnan(rs).all())
106+
self.assert_(len(rs) == 5)
107+
108+
def test_cmov_window_frame(self):
109+
try:
110+
from scikits.timeseries.lib import cmov_window
111+
except ImportError:
112+
raise nose.SkipTest
113+
114+
# DataFrame
115+
vals = np.random.randn(10, 2)
116+
xp = cmov_window(vals, 5, 'boxcar')
117+
rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True)
118+
assert_frame_equal(DataFrame(xp), rs)
119+
120+
def test_cmov_window_na_min_periods(self):
121+
try:
122+
from scikits.timeseries.lib import cmov_window
123+
except ImportError:
124+
raise nose.SkipTest
125+
126+
# min_periods
127+
vals = Series(np.random.randn(10))
128+
vals[4] = np.nan
129+
vals[8] = np.nan
130+
131+
xp = mom.rolling_mean(vals, 5, min_periods=4, center=True)
132+
rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True)
133+
134+
assert_series_equal(xp, rs)
135+
136+
def test_cmov_window_regular(self):
86137
try:
87138
from scikits.timeseries.lib import cmov_window
88139
except ImportError:
@@ -94,28 +145,26 @@ def test_cmov_window_types(self):
94145
vals = np.random.randn(10)
95146
xp = cmov_window(vals, 5, wt)
96147

97-
rs = mom.rolling_window(vals, 5, window_type=wt, center=True)
98-
assert_almost_equal(xp.compressed(), rs[2:-2])
99-
assert_almost_equal(xp.mask, np.isnan(rs))
148+
rs = mom.rolling_window(Series(vals), 5, wt, center=True)
149+
assert_series_equal(Series(xp), rs)
100150

101-
def test_cmov_special(self):
151+
def test_cmov_window_special(self):
102152
try:
103153
from scikits.timeseries.lib import cmov_window
104154
except ImportError:
105155
raise nose.SkipTest
106156

107157
win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
108158
kwds = [{'beta' : 1.}, {'std' : 1.}, {'power' : 2., 'width' : 2.},
109-
{'width' : 2.}]
159+
{'width' : 0.5}]
110160

111161
for wt, k in zip(win_types, kwds):
112162
vals = np.random.randn(10)
113163
xp = cmov_window(vals, 5, (wt,) + tuple(k.values()))
114164

115-
rs = mom.rolling_window(vals, 5, window_type=wt, center=True,
165+
rs = mom.rolling_window(Series(vals), 5, wt, center=True,
116166
**k)
117-
assert_almost_equal(xp.compressed(), rs[2:-2])
118-
assert_almost_equal(xp.mask, np.isnan(rs))
167+
assert_series_equal(Series(xp), rs)
119168

120169
def test_rolling_median(self):
121170
self._check_moment_func(mom.rolling_median, np.median)

0 commit comments

Comments
 (0)