Skip to content

Commit 24b309f

Browse files
seth-pjreback
authored andcommitted
BUG: ewma() weights incorrect when some values are missing (GH7543)
1 parent bfa9bc5 commit 24b309f

File tree

4 files changed

+102
-39
lines changed

4 files changed

+102
-39
lines changed

doc/source/v0.15.0.txt

+12
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,18 @@ API changes
5858

5959
rolling_min(s, window=10, min_periods=5)
6060

61+
- :func:`ewma`, :func:`ewmastd`, :func:`ewmavar`, :func:`ewmacorr`, and :func:`ewmacov`
62+
now have an optional ``ignore_na`` argument.
63+
When ``ignore_na=False`` (the default), missing values are taken into account in the weights calculation.
64+
When ``ignore_na=True`` (which reproduces the pre-0.15.0 behavior), missing values are ignored in the weights calculation.
65+
(:issue:`7543`)
66+
67+
.. ipython:: python
68+
69+
ewma(Series([None, 1., 100.]), com=2.5)
70+
ewma(Series([1., None, 100.]), com=2.5, ignore_na=True) # pre-0.15.0 behavior
71+
ewma(Series([1., None, 100.]), com=2.5, ignore_na=False) # default
72+
6173
- Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`)
6274

6375
In prior versions this would drop the timezone.

pandas/algos.pyx

+18-26
Original file line numberDiff line numberDiff line change
@@ -979,14 +979,16 @@ def roll_mean(ndarray[double_t] input,
979979
#-------------------------------------------------------------------------------
980980
# Exponentially weighted moving average
981981

982-
def ewma(ndarray[double_t] input, double_t com, int adjust):
982+
def ewma(ndarray[double_t] input, double_t com, int adjust, int ignore_na):
983983
'''
984984
Compute exponentially-weighted moving average using center-of-mass.
985985
986986
Parameters
987987
----------
988988
input : ndarray (float64 type)
989989
com : float64
990+
adjust: int
991+
ignore_na: int
990992
991993
Returns
992994
-------
@@ -1002,37 +1004,27 @@ def ewma(ndarray[double_t] input, double_t com, int adjust):
10021004
if N == 0:
10031005
return output
10041006

1005-
neww = 1. / (1. + com)
1006-
oldw = 1. - neww
1007-
adj = oldw
1007+
alpha = 1. / (1. + com)
1008+
old_wt_factor = 1. - alpha
1009+
new_wt = 1.0 if adjust else alpha
10081010

1009-
if adjust:
1010-
output[0] = neww * input[0]
1011-
else:
1012-
output[0] = input[0]
1011+
output[0] = input[0]
1012+
weighted_avg = output[0]
1013+
old_wt = 1.
10131014

10141015
for i from 1 <= i < N:
10151016
cur = input[i]
1016-
prev = output[i - 1]
1017-
1018-
if cur == cur:
1019-
if prev == prev:
1020-
output[i] = oldw * prev + neww * cur
1021-
else:
1022-
output[i] = neww * cur
1017+
if weighted_avg == weighted_avg:
1018+
if cur == cur:
1019+
old_wt *= old_wt_factor
1020+
weighted_avg = ((old_wt * weighted_avg) + (new_wt * cur)) / (old_wt + new_wt)
1021+
old_wt += new_wt
1022+
elif not ignore_na:
1023+
old_wt *= old_wt_factor
10231024
else:
1024-
output[i] = prev
1025-
1026-
if adjust:
1027-
for i from 0 <= i < N:
1028-
cur = input[i]
1025+
weighted_avg = cur
10291026

1030-
if cur == cur:
1031-
output[i] = output[i] / (1. - adj)
1032-
adj *= oldw
1033-
else:
1034-
if i >= 1:
1035-
output[i] = output[i - 1]
1027+
output[i] = weighted_avg
10361028

10371029
return output
10381030

pandas/stats/moments.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@
8989
imbalance in relative weightings (viewing EWMA as a moving average)
9090
how : string, default 'mean'
9191
Method for down- or re-sampling
92+
ignore_na : boolean, default False
93+
Ignore missing values when calculating weights;
94+
specify True to reproduce pre-0.15.0 behavior
9295
"""
9396

9497
_ewm_notes = r"""
@@ -420,12 +423,12 @@ def _get_center_of_mass(com, span, halflife):
420423
_type_of_input_retval, _ewm_notes)
421424
@Appender(_doc_template)
422425
def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None,
423-
adjust=True, how=None):
426+
adjust=True, how=None, ignore_na=False):
424427
com = _get_center_of_mass(com, span, halflife)
425428
arg = _conv_timerule(arg, freq, how)
426429

427430
def _ewma(v):
428-
result = algos.ewma(v, com, int(adjust))
431+
result = algos.ewma(v, com, int(adjust), int(ignore_na))
429432
first_index = _first_valid_index(v)
430433
result[first_index: first_index + min_periods] = NaN
431434
return result
@@ -444,11 +447,11 @@ def _first_valid_index(arr):
444447
_ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes)
445448
@Appender(_doc_template)
446449
def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
447-
freq=None, how=None):
450+
freq=None, how=None, ignore_na=False):
448451
com = _get_center_of_mass(com, span, halflife)
449452
arg = _conv_timerule(arg, freq, how)
450-
moment2nd = ewma(arg * arg, com=com, min_periods=min_periods)
451-
moment1st = ewma(arg, com=com, min_periods=min_periods)
453+
moment2nd = ewma(arg * arg, com=com, min_periods=min_periods, ignore_na=ignore_na)
454+
moment1st = ewma(arg, com=com, min_periods=min_periods, ignore_na=ignore_na)
452455

453456
result = moment2nd - moment1st ** 2
454457
if not bias:
@@ -460,9 +463,10 @@ def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
460463
@Substitution("Exponentially-weighted moving std", _unary_arg,
461464
_ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes)
462465
@Appender(_doc_template)
463-
def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False):
466+
def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
467+
ignore_na=False):
464468
result = ewmvar(arg, com=com, span=span, halflife=halflife,
465-
min_periods=min_periods, bias=bias)
469+
min_periods=min_periods, bias=bias, ignore_na=ignore_na)
466470
return _zsqrt(result)
467471

468472
ewmvol = ewmstd
@@ -472,7 +476,7 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False):
472476
_ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes)
473477
@Appender(_doc_template)
474478
def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
475-
bias=False, freq=None, pairwise=None, how=None):
479+
bias=False, freq=None, pairwise=None, how=None, ignore_na=False):
476480
if arg2 is None:
477481
arg2 = arg1
478482
pairwise = True if pairwise is None else pairwise
@@ -484,7 +488,8 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
484488
arg2 = _conv_timerule(arg2, freq, how)
485489

486490
def _get_ewmcov(X, Y):
487-
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
491+
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods,
492+
ignore_na=ignore_na)
488493
return (mean(X * Y) - mean(X) * mean(Y))
489494
result = _flex_binary_moment(arg1, arg2, _get_ewmcov,
490495
pairwise=bool(pairwise))
@@ -499,7 +504,7 @@ def _get_ewmcov(X, Y):
499504
_ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes)
500505
@Appender(_doc_template)
501506
def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
502-
freq=None, pairwise=None, how=None):
507+
freq=None, pairwise=None, how=None, ignore_na=False):
503508
if arg2 is None:
504509
arg2 = arg1
505510
pairwise = True if pairwise is None else pairwise
@@ -511,9 +516,10 @@ def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
511516
arg2 = _conv_timerule(arg2, freq, how)
512517

513518
def _get_ewmcorr(X, Y):
514-
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
519+
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods,
520+
ignore_na=ignore_na)
515521
var = lambda x: ewmvar(x, com=com, span=span, halflife=halflife, min_periods=min_periods,
516-
bias=True)
522+
bias=True, ignore_na=ignore_na)
517523
return (mean(X * Y) - mean(X) * mean(Y)) / _zsqrt(var(X) * var(Y))
518524
result = _flex_binary_moment(arg1, arg2, _get_ewmcorr,
519525
pairwise=bool(pairwise))

pandas/stats/tests/test_moments.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -520,11 +520,64 @@ def test_ewma(self):
520520
result = mom.ewma(arr, span=100, adjust=False).sum()
521521
self.assertTrue(np.abs(result - 1) < 1e-2)
522522

523+
s = Series([1.0, 2.0, 4.0, 8.0])
524+
525+
expected = Series([1.0, 1.6, 2.736842, 4.923077])
526+
for f in [lambda s: mom.ewma(s, com=2.0, adjust=True),
527+
lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False),
528+
lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=True),
529+
]:
530+
result = f(s)
531+
assert_series_equal(result, expected)
532+
533+
expected = Series([1.0, 1.333333, 2.222222, 4.148148])
534+
for f in [lambda s: mom.ewma(s, com=2.0, adjust=False),
535+
lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=False),
536+
lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=True),
537+
]:
538+
result = f(s)
539+
assert_series_equal(result, expected)
540+
523541
def test_ewma_nan_handling(self):
524542
s = Series([1.] + [np.nan] * 5 + [1.])
543+
result = mom.ewma(s, com=5)
544+
assert_almost_equal(result, [1.] * len(s))
525545

546+
s = Series([np.nan] * 2 + [1.] + [np.nan] * 2 + [1.])
526547
result = mom.ewma(s, com=5)
527-
assert_almost_equal(result, [1] * len(s))
548+
assert_almost_equal(result, [np.nan] * 2 + [1.] * 4)
549+
550+
# GH 7603
551+
s0 = Series([np.nan, 1., 101.])
552+
s1 = Series([1., np.nan, 101.])
553+
s2 = Series([np.nan, 1., np.nan, np.nan, 101., np.nan])
554+
com = 2.
555+
alpha = 1. / (1. + com)
556+
557+
def simple_wma(s, w):
558+
return (s.multiply(w).cumsum() / w.cumsum()).fillna(method='ffill')
559+
560+
for (s, adjust, ignore_na, w) in [
561+
(s0, True, False, [np.nan, (1.0 - alpha), 1.]),
562+
(s0, True, True, [np.nan, (1.0 - alpha), 1.]),
563+
(s0, False, False, [np.nan, (1.0 - alpha), alpha]),
564+
(s0, False, True, [np.nan, (1.0 - alpha), alpha]),
565+
(s1, True, False, [(1.0 - alpha)**2, np.nan, 1.]),
566+
(s1, True, True, [(1.0 - alpha), np.nan, 1.]),
567+
(s1, False, False, [(1.0 - alpha)**2, np.nan, alpha]),
568+
(s1, False, True, [(1.0 - alpha), np.nan, alpha]),
569+
(s2, True, False, [np.nan, (1.0 - alpha)**3, np.nan, np.nan, 1., np.nan]),
570+
(s2, True, True, [np.nan, (1.0 - alpha), np.nan, np.nan, 1., np.nan]),
571+
(s2, False, False, [np.nan, (1.0 - alpha)**3, np.nan, np.nan, alpha, np.nan]),
572+
(s2, False, True, [np.nan, (1.0 - alpha), np.nan, np.nan, alpha, np.nan]),
573+
]:
574+
expected = simple_wma(s, Series(w))
575+
result = mom.ewma(s, com=com, adjust=adjust, ignore_na=ignore_na)
576+
assert_series_equal(result, expected)
577+
if ignore_na is False:
578+
# check that ignore_na defaults to False
579+
result = mom.ewma(s, com=com, adjust=adjust)
580+
assert_series_equal(result, expected)
528581

529582
def test_ewmvar(self):
530583
self._check_ew(mom.ewmvar)

0 commit comments

Comments
 (0)