Skip to content

Commit aa431bc

Browse files
committed
Merge pull request #8238 from stahlous/win_weights
BUG: normalize rolling_window() weights, remove scikits-timeseries dependency for testing
2 parents 6f4af67 + 3c1410f commit aa431bc

File tree

5 files changed

+192
-51
lines changed

5 files changed

+192
-51
lines changed

doc/source/computation.rst

+15-2
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ keyword. The list of recognized types are:
310310
311311
rolling_window(ser, 5, 'triang')
312312
313-
Note that the ``boxcar`` window is equivalent to ``rolling_mean``:
313+
Note that the ``boxcar`` window is equivalent to ``rolling_mean``.
314314

315315
.. ipython:: python
316316
@@ -336,6 +336,19 @@ This keyword is available in other rolling functions as well.
336336
337337
rolling_mean(ser, 5, center=True)
338338
339+
.. _stats.moments.normalization
340+
341+
.. note::
342+
343+
In rolling sum mode (``mean=False``) there is no normalization done to the
344+
weights. Passing custom weights of ``[1, 1, 1]`` will yield a different
345+
result than passing weights of ``[2, 2, 2]``, for example. When passing a
346+
``win_type`` instead of explicitly specifying the weights, the weights are
347+
already normalized so that the largest weight is 1.
348+
349+
In contrast, the nature of the rolling mean calculation (``mean=True``)is
350+
such that the weights are normalized with respect to each other. Weights
351+
of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result.
339352

340353
.. _stats.moments.binary:
341354

@@ -610,4 +623,4 @@ are scaled by debiasing factors
610623
(For :math:`w_i = 1`, this reduces to the usual :math:`N / (N - 1)` factor,
611624
with :math:`N = t + 1`.)
612625
See http://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance
613-
for further details.
626+
for further details.

doc/source/v0.15.0.txt

+28
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,34 @@ Rolling/Expanding Moments API changes
383383

384384
rolling_sum(Series(range(4)), window=3, min_periods=0, center=True)
385385

386+
- :func:`rolling_window` now normalizes the weights properly in rolling mean mode (`mean=True`) so that
387+
the calculated weighted means (e.g. 'triang', 'gaussian') are distributed about the same means as those
388+
calculated without weighting (i.e. 'boxcar'). See :ref:`the note on normalization
389+
<stats.moments.normalization>` for further details. (:issue:`7618`)
390+
391+
.. ipython:: python
392+
393+
s = Series([10.5, 8.8, 11.4, 9.7, 9.3])
394+
395+
Behavior prior to 0.15.0:
396+
397+
.. code-block:: python
398+
399+
In [39]: rolling_window(s, window=3, win_type='triang', center=True)
400+
Out[39]:
401+
0 NaN
402+
1 6.583333
403+
2 6.883333
404+
3 6.683333
405+
4 NaN
406+
dtype: float64
407+
408+
New behavior
409+
410+
.. ipython:: python
411+
412+
rolling_window(s, window=3, win_type='triang', center=True)
413+
386414
- Removed ``center`` argument from :func:`expanding_max`, :func:`expanding_min`, :func:`expanding_sum`,
387415
:func:`expanding_mean`, :func:`expanding_median`, :func:`expanding_std`, :func:`expanding_var`,
388416
:func:`expanding_skew`, :func:`expanding_kurt`, :func:`expanding_quantile`, :func:`expanding_count`,

pandas/algos.pyx

+2-4
Original file line numberDiff line numberDiff line change
@@ -1897,7 +1897,7 @@ def roll_generic(ndarray[float64_t, cast=True] input,
18971897

18981898
def roll_window(ndarray[float64_t, ndim=1, cast=True] input,
18991899
ndarray[float64_t, ndim=1, cast=True] weights,
1900-
int minp, bint avg=True, bint avg_wgt=False):
1900+
int minp, bint avg=True):
19011901
"""
19021902
Assume len(weights) << len(input)
19031903
"""
@@ -1915,7 +1915,7 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] input,
19151915

19161916
minp = _check_minp(len(weights), minp, in_n)
19171917

1918-
if avg_wgt:
1918+
if avg:
19191919
for win_i from 0 <= win_i < win_n:
19201920
val_win = weights[win_i]
19211921
if val_win != val_win:
@@ -1956,8 +1956,6 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] input,
19561956
c = counts[in_i]
19571957
if c < minp:
19581958
output[in_i] = NaN
1959-
elif avg:
1960-
output[in_i] /= c
19611959

19621960
return output
19631961

pandas/stats/tests/test_moments.py

+147-44
Original file line numberDiff line numberDiff line change
@@ -65,47 +65,40 @@ def test_rolling_mean(self):
6565
self._check_moment_func(mom.rolling_mean, np.mean)
6666

6767
def test_cmov_mean(self):
68+
# GH 8238
6869
tm._skip_if_no_scipy()
69-
try:
70-
from scikits.timeseries.lib import cmov_mean
71-
except ImportError:
72-
raise nose.SkipTest("no scikits.timeseries")
7370

74-
vals = np.random.randn(10)
75-
xp = cmov_mean(vals, 5)
71+
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49,
72+
16.68, 9.48, 10.63, 14.48])
73+
xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516,
74+
12.818, 12.952, np.nan, np.nan])
7675

7776
rs = mom.rolling_mean(vals, 5, center=True)
78-
assert_almost_equal(xp.compressed(), rs[2:-2])
79-
assert_almost_equal(xp.mask, np.isnan(rs))
77+
assert_almost_equal(xp, rs)
8078

8179
xp = Series(rs)
8280
rs = mom.rolling_mean(Series(vals), 5, center=True)
8381
assert_series_equal(xp, rs)
8482

8583
def test_cmov_window(self):
84+
# GH 8238
8685
tm._skip_if_no_scipy()
87-
try:
88-
from scikits.timeseries.lib import cmov_window
89-
except ImportError:
90-
raise nose.SkipTest("no scikits.timeseries")
9186

92-
vals = np.random.randn(10)
93-
xp = cmov_window(vals, 5, 'boxcar')
87+
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81,
88+
13.49, 16.68, 9.48, 10.63, 14.48])
89+
xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516,
90+
12.818, 12.952, np.nan, np.nan])
9491

9592
rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
96-
assert_almost_equal(xp.compressed(), rs[2:-2])
97-
assert_almost_equal(xp.mask, np.isnan(rs))
93+
assert_almost_equal(xp, rs)
9894

9995
xp = Series(rs)
10096
rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True)
10197
assert_series_equal(xp, rs)
10298

10399
def test_cmov_window_corner(self):
100+
# GH 8238
104101
tm._skip_if_no_scipy()
105-
try:
106-
from scikits.timeseries.lib import cmov_window
107-
except ImportError:
108-
raise nose.SkipTest("no scikits.timeseries")
109102

110103
# all nan
111104
vals = np.empty(10, dtype=float)
@@ -125,24 +118,37 @@ def test_cmov_window_corner(self):
125118
self.assertEqual(len(rs), 5)
126119

127120
def test_cmov_window_frame(self):
121+
# Gh 8238
128122
tm._skip_if_no_scipy()
129-
try:
130-
from scikits.timeseries.lib import cmov_window
131-
except ImportError:
132-
raise nose.SkipTest("no scikits.timeseries")
123+
124+
vals = np.array([[ 12.18, 3.64],
125+
[ 10.18, 9.16],
126+
[ 13.24, 14.61],
127+
[ 4.51, 8.11],
128+
[ 6.15, 11.44],
129+
[ 9.14, 6.21],
130+
[ 11.31, 10.67],
131+
[ 2.94, 6.51],
132+
[ 9.42, 8.39],
133+
[ 12.44, 7.34 ]])
134+
135+
xp = np.array([[ np.nan, np.nan],
136+
[ np.nan, np.nan],
137+
[ 9.252, 9.392],
138+
[ 8.644, 9.906],
139+
[ 8.87 , 10.208],
140+
[ 6.81 , 8.588],
141+
[ 7.792, 8.644],
142+
[ 9.05 , 7.824],
143+
[ np.nan, np.nan],
144+
[ np.nan, np.nan]])
133145

134146
# DataFrame
135-
vals = np.random.randn(10, 2)
136-
xp = cmov_window(vals, 5, 'boxcar')
137147
rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True)
138148
assert_frame_equal(DataFrame(xp), rs)
139149

140150
def test_cmov_window_na_min_periods(self):
141151
tm._skip_if_no_scipy()
142-
try:
143-
from scikits.timeseries.lib import cmov_window
144-
except ImportError:
145-
raise nose.SkipTest("no scikits.timeseries")
146152

147153
# min_periods
148154
vals = Series(np.random.randn(10))
@@ -155,39 +161,136 @@ def test_cmov_window_na_min_periods(self):
155161
assert_series_equal(xp, rs)
156162

157163
def test_cmov_window_regular(self):
164+
# GH 8238
158165
tm._skip_if_no_scipy()
159-
try:
160-
from scikits.timeseries.lib import cmov_window
161-
except ImportError:
162-
raise nose.SkipTest("no scikits.timeseries")
163166

164167
win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
165168
'blackmanharris', 'nuttall', 'barthann']
169+
170+
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81,
171+
13.49, 16.68, 9.48, 10.63, 14.48])
172+
xps = {
173+
'hamming': [np.nan, np.nan, 8.71384, 9.56348, 12.38009,
174+
14.03687, 13.8567, 11.81473, np.nan, np.nan],
175+
'triang': [np.nan, np.nan, 9.28667, 10.34667, 12.00556,
176+
13.33889, 13.38, 12.33667, np.nan, np.nan],
177+
'barthann': [np.nan, np.nan, 8.4425, 9.1925, 12.5575,
178+
14.3675, 14.0825, 11.5675, np.nan, np.nan],
179+
'bohman': [np.nan, np.nan, 7.61599, 9.1764, 12.83559,
180+
14.17267, 14.65923, 11.10401, np.nan, np.nan],
181+
'blackmanharris': [np.nan, np.nan, 6.97691, 9.16438, 13.05052,
182+
14.02156, 15.10512, 10.74574, np.nan, np.nan],
183+
'nuttall': [np.nan, np.nan, 7.04618, 9.16786, 13.02671,
184+
14.03559, 15.05657, 10.78514, np.nan, np.nan],
185+
'blackman': [np.nan, np.nan, 7.73345, 9.17869, 12.79607,
186+
14.20036, 14.57726, 11.16988, np.nan, np.nan],
187+
'bartlett': [np.nan, np.nan, 8.4425, 9.1925, 12.5575,
188+
14.3675, 14.0825, 11.5675, np.nan, np.nan]}
189+
166190
for wt in win_types:
167-
vals = np.random.randn(10)
168-
xp = cmov_window(vals, 5, wt)
191+
xp = Series(xps[wt])
192+
rs = mom.rolling_window(Series(vals), 5, wt, center=True)
193+
assert_series_equal(xp, rs)
194+
195+
def test_cmov_window_regular_linear_range(self):
196+
# GH 8238
197+
tm._skip_if_no_scipy()
169198

199+
win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
200+
'blackmanharris', 'nuttall', 'barthann']
201+
202+
vals = np.array(range(10), dtype=np.float)
203+
xp = vals.copy()
204+
xp[:2] = np.nan
205+
xp[-2:] = np.nan
206+
xp = Series(xp)
207+
208+
for wt in win_types:
170209
rs = mom.rolling_window(Series(vals), 5, wt, center=True)
171-
assert_series_equal(Series(xp), rs)
210+
assert_series_equal(xp, rs)
211+
212+
def test_cmov_window_regular_missing_data(self):
213+
# GH 8238
214+
tm._skip_if_no_scipy()
215+
216+
win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
217+
'blackmanharris', 'nuttall', 'barthann']
218+
219+
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81,
220+
13.49, 16.68, np.nan, 10.63, 14.48])
221+
xps = {
222+
'bartlett': [np.nan, np.nan, 9.70333, 10.5225, 8.4425,
223+
9.1925, 12.5575, 14.3675, 15.61667, 13.655],
224+
'blackman': [np.nan, np.nan, 9.04582, 11.41536, 7.73345,
225+
9.17869, 12.79607, 14.20036, 15.8706, 13.655],
226+
'barthann': [np.nan, np.nan, 9.70333, 10.5225, 8.4425,
227+
9.1925, 12.5575, 14.3675, 15.61667, 13.655],
228+
'bohman': [np.nan, np.nan, 8.9444, 11.56327, 7.61599,
229+
9.1764, 12.83559, 14.17267, 15.90976, 13.655],
230+
'hamming': [np.nan, np.nan, 9.59321, 10.29694, 8.71384,
231+
9.56348, 12.38009, 14.20565, 15.24694, 13.69758],
232+
'nuttall': [np.nan, np.nan, 8.47693, 12.2821, 7.04618,
233+
9.16786, 13.02671, 14.03673, 16.08759, 13.65553],
234+
'triang': [np.nan, np.nan, 9.33167, 9.76125, 9.28667,
235+
10.34667, 12.00556, 13.82125, 14.49429, 13.765],
236+
'blackmanharris': [np.nan, np.nan, 8.42526, 12.36824, 6.97691,
237+
9.16438, 13.05052, 14.02175, 16.1098,
238+
13.65509]
239+
}
240+
241+
for wt in win_types:
242+
xp = Series(xps[wt])
243+
rs = mom.rolling_window(Series(vals), 5, wt, min_periods=3)
244+
assert_series_equal(xp, rs)
172245

173246
def test_cmov_window_special(self):
247+
# GH 8238
174248
tm._skip_if_no_scipy()
175-
try:
176-
from scikits.timeseries.lib import cmov_window
177-
except ImportError:
178-
raise nose.SkipTest("no scikits.timeseries")
179249

180250
win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
181251
kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.},
182252
{'width': 0.5}]
183253

254+
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81,
255+
13.49, 16.68, 9.48, 10.63, 14.48])
256+
257+
xps = {
258+
'gaussian': [np.nan, np.nan, 8.97297, 9.76077, 12.24763,
259+
13.89053, 13.65671, 12.01002, np.nan, np.nan],
260+
'general_gaussian': [np.nan, np.nan, 9.85011, 10.71589,
261+
11.73161, 13.08516, 12.95111, 12.74577,
262+
np.nan, np.nan],
263+
'slepian': [np.nan, np.nan, 9.81073, 10.89359, 11.70284,
264+
12.88331, 12.96079, 12.77008, np.nan, np.nan],
265+
'kaiser': [np.nan, np.nan, 9.86851, 11.02969, 11.65161,
266+
12.75129, 12.90702, 12.83757, np.nan, np.nan]
267+
}
268+
184269
for wt, k in zip(win_types, kwds):
185-
vals = np.random.randn(10)
186-
xp = cmov_window(vals, 5, (wt,) + tuple(k.values()))
270+
xp = Series(xps[wt])
187271

188272
rs = mom.rolling_window(Series(vals), 5, wt, center=True,
189273
**k)
190-
assert_series_equal(Series(xp), rs)
274+
assert_series_equal(xp, rs)
275+
276+
def test_cmov_window_special_linear_range(self):
277+
# GH 8238
278+
tm._skip_if_no_scipy()
279+
280+
win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
281+
kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.},
282+
{'width': 0.5}]
283+
284+
vals = np.array(range(10), dtype=np.float)
285+
xp = vals.copy()
286+
xp[:2] = np.nan
287+
xp[-2:] = np.nan
288+
xp = Series(xp)
289+
290+
for wt, k in zip(win_types, kwds):
291+
rs = mom.rolling_window(Series(vals), 5, wt, center=True,
292+
**k)
293+
assert_series_equal(xp, rs)
191294

192295
def test_rolling_median(self):
193296
self._check_moment_func(mom.rolling_median, np.median)

pandas/util/print_versions.py

-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ def show_versions(as_json=False):
6868
("IPython", lambda mod: mod.__version__),
6969
("sphinx", lambda mod: mod.__version__),
7070
("patsy", lambda mod: mod.__version__),
71-
("scikits.timeseries", lambda mod: mod.__version__),
7271
("dateutil", lambda mod: mod.__version__),
7372
("pytz", lambda mod: mod.VERSION),
7473
("bottleneck", lambda mod: mod.__version__),

0 commit comments

Comments
 (0)