Skip to content

Commit ef5aeae

Browse files
committed
Merge pull request pandas-dev#7898 from seth-p/ewm_min_periods_off_by_one
BUG: ewm*() interpretation of min_periods is off by one
2 parents 0d3229d + 0192f53 commit ef5aeae

File tree

3 files changed

+66
-8
lines changed

3 files changed

+66
-8
lines changed

doc/source/v0.15.0.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ API changes
6868

6969
rolling_min(s, window=10, min_periods=5)
7070

71-
- :func:`ewma`, :func:`ewmastd`, :func:`ewmavar`, :func:`ewmacorr`, and :func:`ewmacov`
71+
- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcorr`, and :func:`ewmcov`
7272
now have an optional ``ignore_na`` argument.
7373
When ``ignore_na=False`` (the default), missing values are taken into account in the weights calculation.
7474
When ``ignore_na=True`` (which reproduces the pre-0.15.0 behavior), missing values are ignored in the weights calculation.
@@ -80,6 +80,11 @@ API changes
8080
ewma(Series([1., None, 100.]), com=2.5, ignore_na=True) # pre-0.15.0 behavior
8181
ewma(Series([1., None, 100.]), com=2.5, ignore_na=False) # default
8282

83+
- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcorr`, and :func:`ewmcov`
84+
now set to ``NaN`` the first ``min_periods-1`` entries of the result (for ``min_periods>1``).
85+
Previously the first ``min_periods`` entries of the result were set to ``NaN``.
86+
The new behavior accords with the existing documentation. (:issue:`7884`)
87+
8388
- Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`)
8489

8590
In prior versions this would drop the timezone.

pandas/stats/moments.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -463,8 +463,9 @@ def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None,
463463

464464
def _ewma(v):
465465
result = algos.ewma(v, com, int(adjust), int(ignore_na))
466-
first_index = _first_valid_index(v)
467-
result[first_index: first_index + min_periods] = NaN
466+
if min_periods > 1:
467+
first_index = _first_valid_index(v)
468+
result[first_index: first_index + min_periods - 1] = NaN
468469
return result
469470

470471
return_hook, values = _process_data_structure(arg)

pandas/stats/tests/test_moments.py

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -629,8 +629,37 @@ def _check_ew_ndarray(self, func, preserve_nan=False):
629629
arr = randn(50)
630630
arr[:10] = np.NaN
631631
arr[-10:] = np.NaN
632-
633-
# ??? check something
632+
s = Series(arr)
633+
634+
# check min_periods
635+
# GH 7898
636+
result = func(s, 50, min_periods=2)
637+
self.assertTrue(np.isnan(result.values[:11]).all())
638+
self.assertFalse(np.isnan(result.values[11:]).any())
639+
640+
for min_periods in (0, 1):
641+
result = func(s, 50, min_periods=min_periods)
642+
if func == mom.ewma:
643+
self.assertTrue(np.isnan(result.values[:10]).all())
644+
self.assertFalse(np.isnan(result.values[10:]).any())
645+
else:
646+
# ewmstd, ewmvol, ewmvar *should* require at least two values,
647+
# but currently require only one, for some reason
648+
self.assertTrue(np.isnan(result.values[:10]).all())
649+
self.assertFalse(np.isnan(result.values[10:]).any())
650+
651+
# check series of length 0
652+
result = func(Series([]), 50, min_periods=min_periods)
653+
assert_series_equal(result, Series([]))
654+
655+
# check series of length 1
656+
result = func(Series([1.]), 50, min_periods=min_periods)
657+
if func == mom.ewma:
658+
assert_series_equal(result, Series([1.]))
659+
else:
660+
# ewmstd, ewmvol, ewmvar *should* require at least two values,
661+
# so should return NaN, but currently require one, so return 0.
662+
assert_series_equal(result, Series([0.]))
634663

635664
# pass in ints
636665
result2 = func(np.arange(50), span=10)
@@ -752,9 +781,32 @@ def _check_binary_ew(self, func):
752781
B[-10:] = np.NaN
753782

754783
result = func(A, B, 20, min_periods=5)
755-
756-
self.assertTrue(np.isnan(result.values[:15]).all())
757-
self.assertFalse(np.isnan(result.values[15:]).any())
784+
self.assertTrue(np.isnan(result.values[:14]).all())
785+
self.assertFalse(np.isnan(result.values[14:]).any())
786+
787+
# GH 7898
788+
for min_periods in (0, 1, 2):
789+
result = func(A, B, 20, min_periods=min_periods)
790+
# binary functions (ewmcov, ewmcorr) *should* require at least two values
791+
if (func == mom.ewmcov) and (min_periods <= 1):
792+
# currenty ewmcov requires only one value, for some reason.
793+
self.assertTrue(np.isnan(result.values[:10]).all())
794+
self.assertFalse(np.isnan(result.values[10:]).any())
795+
else:
796+
self.assertTrue(np.isnan(result.values[:11]).all())
797+
self.assertFalse(np.isnan(result.values[11:]).any())
798+
799+
# check series of length 0
800+
result = func(Series([]), Series([]), 50, min_periods=min_periods)
801+
assert_series_equal(result, Series([]))
802+
803+
# check series of length 1
804+
result = func(Series([1.]), Series([1.]), 50, min_periods=min_periods)
805+
if (func == mom.ewmcov) and (min_periods <= 1):
806+
# currenty ewmcov requires only one value, for some reason.
807+
assert_series_equal(result, Series([0.]))
808+
else:
809+
assert_series_equal(result, Series([np.NaN]))
758810

759811
self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5)
760812

0 commit comments

Comments
 (0)