Skip to content

BUG: ewm*() interpretation of min_periods is off by one #7898

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 2, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ API changes

rolling_min(s, window=10, min_periods=5)

- :func:`ewma`, :func:`ewmastd`, :func:`ewmavar`, :func:`ewmacorr`, and :func:`ewmacov`
- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcorr`, and :func:`ewmcov`
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just removing gratuitous "a" from function names, and adding ewmvol.

now have an optional ``ignore_na`` argument.
When ``ignore_na=False`` (the default), missing values are taken into account in the weights calculation.
When ``ignore_na=True`` (which reproduces the pre-0.15.0 behavior), missing values are ignored in the weights calculation.
Expand All @@ -80,6 +80,11 @@ API changes
ewma(Series([1., None, 100.]), com=2.5, ignore_na=True) # pre-0.15.0 behavior
ewma(Series([1., None, 100.]), com=2.5, ignore_na=False) # default

- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcorr`, and :func:`ewmcov`
now set to ``NaN`` the first ``min_periods-1`` entries of the result (for ``min_periods>1``).
Previously the first ``min_periods`` entries of the result were set to ``NaN``.
The new behavior accords with the existing documentation. (:issue:`7884`)

- Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`)

In prior versions this would drop the timezone.
Expand Down
5 changes: 3 additions & 2 deletions pandas/stats/moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,8 +463,9 @@ def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None,

def _ewma(v):
result = algos.ewma(v, com, int(adjust), int(ignore_na))
first_index = _first_valid_index(v)
result[first_index: first_index + min_periods] = NaN
if min_periods > 1:
first_index = _first_valid_index(v)
result[first_index: first_index + min_periods - 1] = NaN
return result

return_hook, values = _process_data_structure(arg)
Expand Down
62 changes: 57 additions & 5 deletions pandas/stats/tests/test_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,8 +629,37 @@ def _check_ew_ndarray(self, func, preserve_nan=False):
arr = randn(50)
arr[:10] = np.NaN
arr[-10:] = np.NaN

# ??? check something
s = Series(arr)

# check min_periods
# GH 7898
result = func(s, 50, min_periods=2)
self.assertTrue(np.isnan(result.values[:11]).all())
self.assertFalse(np.isnan(result.values[11:]).any())

for min_periods in (0, 1):
result = func(s, 50, min_periods=min_periods)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you check a 0 and 1 lenght series as well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

if func == mom.ewma:
self.assertTrue(np.isnan(result.values[:10]).all())
self.assertFalse(np.isnan(result.values[10:]).any())
else:
# ewmstd, ewmvol, ewmvar *should* require at least two values,
# but currently require only one, for some reason
self.assertTrue(np.isnan(result.values[:10]).all())
self.assertFalse(np.isnan(result.values[10:]).any())

# check series of length 0
result = func(Series([]), 50, min_periods=min_periods)
assert_series_equal(result, Series([]))

# check series of length 1
result = func(Series([1.]), 50, min_periods=min_periods)
if func == mom.ewma:
assert_series_equal(result, Series([1.]))
else:
# ewmstd, ewmvol, ewmvar *should* require at least two values,
# so should return NaN, but currently require one, so return 0.
assert_series_equal(result, Series([0.]))

# pass in ints
result2 = func(np.arange(50), span=10)
Expand Down Expand Up @@ -752,9 +781,32 @@ def _check_binary_ew(self, func):
B[-10:] = np.NaN

result = func(A, B, 20, min_periods=5)

self.assertTrue(np.isnan(result.values[:15]).all())
self.assertFalse(np.isnan(result.values[15:]).any())
self.assertTrue(np.isnan(result.values[:14]).all())
self.assertFalse(np.isnan(result.values[14:]).any())

# GH 7898
for min_periods in (0, 1, 2):
result = func(A, B, 20, min_periods=min_periods)
# binary functions (ewmcov, ewmcorr) *should* require at least two values
if (func == mom.ewmcov) and (min_periods <= 1):
# currenty ewmcov requires only one value, for some reason.
self.assertTrue(np.isnan(result.values[:10]).all())
self.assertFalse(np.isnan(result.values[10:]).any())
else:
self.assertTrue(np.isnan(result.values[:11]).all())
self.assertFalse(np.isnan(result.values[11:]).any())

# check series of length 0
result = func(Series([]), Series([]), 50, min_periods=min_periods)
assert_series_equal(result, Series([]))

# check series of length 1
result = func(Series([1.]), Series([1.]), 50, min_periods=min_periods)
if (func == mom.ewmcov) and (min_periods <= 1):
# currenty ewmcov requires only one value, for some reason.
assert_series_equal(result, Series([0.]))
else:
assert_series_equal(result, Series([np.NaN]))

self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5)

Expand Down