From a8c2753ad15e6ee580ef0543450a9094324f7a3b Mon Sep 17 00:00:00 2001 From: Alex Alekseyev Date: Sun, 28 Feb 2016 20:58:07 -0500 Subject: [PATCH] ENH: Allow exponentially weighted functions to specify alpha directly Closes #10789. Adds domain checks for exponentially weighted functions. --- doc/source/computation.rst | 27 ++++++----- doc/source/whatsnew/v0.18.0.txt | 1 + pandas/core/generic.py | 7 +-- pandas/core/window.py | 71 +++++++++++++++++------------ pandas/stats/moments.py | 62 +++++++++++++++----------- pandas/tests/test_window.py | 79 ++++++++++++++++++++++++++++++--- 6 files changed, 171 insertions(+), 76 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 2b8cf7e41431b..a495020d704f1 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -733,24 +733,29 @@ therefore there is an assumption that :math:`x_0` is not an ordinary value but rather an exponentially weighted moment of the infinite series up to that point. -One must have :math:`0 < \alpha \leq 1`, but rather than pass :math:`\alpha` -directly, it's easier to think about either the **span**, **center of mass -(com)** or **halflife** of an EW moment: +One must have :math:`0 < \alpha \leq 1`, and while since version 0.18.0 +it has been possible to pass :math:`\alpha` directly, it's often easier +to think about either the **span**, **center of mass (com)** or **half-life** +of an EW moment: .. math:: \alpha = \begin{cases} - \frac{2}{s + 1}, & s = \text{span}\\ - \frac{1}{1 + c}, & c = \text{center of mass}\\ - 1 - \exp^{\frac{\log 0.5}{h}}, & h = \text{half life} + \frac{2}{s + 1}, & \text{for span}\ s \geq 1\\ + \frac{1}{1 + c}, & \text{for center of mass}\ c \geq 0\\ + 1 - \exp^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0 \end{cases} -One must specify precisely one of the three to the EW functions. **Span** -corresponds to what is commonly called a "20-day EW moving average" for -example. **Center of mass** has a more physical interpretation. For example, -**span** = 20 corresponds to **com** = 9.5. **Halflife** is the period of -time for the exponential weight to reduce to one half. +One must specify precisely one of **span**, **center of mass**, **half-life** +and **alpha** to the EW functions: + +- **Span** corresponds to what is commonly called an "N-day EW moving average". +- **Center of mass** has a more physical interpretation and can be thought of + in terms of span: :math:`c = (s - 1) / 2`. +- **Half-life** is the period of time for the exponential weight to reduce to + one half. +- **Alpha** specifies the smoothing factor directly. Here is an example for a univariate time series: diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 77c8a5a585b51..c0a6a8c598ea7 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -947,6 +947,7 @@ Other API Changes - More helpful error message when constructing a ``DataFrame`` with empty data but with indices (:issue:`8020`) - ``.describe()`` will now properly handle bool dtype as a categorical (:issue:`6625`) - More helpful error message invalid ``.transform`` with user defined input (:issue:`10165`) +- Exponentially weighted functions now allow specifying alpha directly (:issue:`10789`) and raise ``ValueError`` if parameters violate ``0 < alpha <= 1`` (:issue:`12492`) .. _whatsnew_0180.deprecations: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c9473900d1d5e..1684768eec2c4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5165,11 +5165,12 @@ def expanding(self, min_periods=1, freq=None, center=False, axis=0): cls.expanding = expanding @Appender(rwindow.ewm.__doc__) - def ewm(self, com=None, span=None, halflife=None, min_periods=0, - freq=None, adjust=True, ignore_na=False, axis=0): + def ewm(self, com=None, span=None, halflife=None, alpha=None, + min_periods=0, freq=None, adjust=True, ignore_na=False, + axis=0): axis = self._get_axis_number(axis) return rwindow.ewm(self, com=com, span=span, halflife=halflife, - min_periods=min_periods, freq=freq, + alpha=alpha, min_periods=min_periods, freq=freq, adjust=adjust, ignore_na=ignore_na, axis=axis) cls.ewm = ewm diff --git a/pandas/core/window.py b/pandas/core/window.py index 167599678d166..31874a96f8111 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1038,13 +1038,21 @@ class EWM(_Rolling): Parameters ---------- - com : float. optional - Center of mass: :math:`\alpha = 1 / (1 + com)`, + com : float, optional + Specify decay in terms of center of mass, + :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0` span : float, optional - Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)` + Specify decay in terms of span, + :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1` halflife : float, optional - Specify decay in terms of halflife, - :math:`\alpha = 1 - exp(log(0.5) / halflife)` + Specify decay in terms of half-life, + :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0` + alpha : float, optional + Specify smoothing factor :math:`\alpha` directly, + :math:`0 < \alpha \leq 1` + + .. versionadded:: 0.18.0 + min_periods : int, default 0 Minimum number of observations in window required to have a value (otherwise result is NA). @@ -1063,16 +1071,10 @@ class EWM(_Rolling): Notes ----- - Either center of mass, span or halflife must be specified - - EWMA is sometimes specified using a "span" parameter `s`, we have that the - decay parameter :math:`\alpha` is related to the span as - :math:`\alpha = 2 / (s + 1) = 1 / (1 + c)` - - where `c` is the center of mass. Given a span, the associated center of - mass is :math:`c = (s - 1) / 2` - - So a "20-day EWMA" would have center 9.5. + Exactly one of center of mass, span, half-life, and alpha must be provided. + Allowed values and relationship between the parameters are specified in the + parameter descriptions above; see the link at the end of this section for + a detailed explanation. The `freq` keyword is used to conform time series data to a specified frequency by resampling the data. This is done with the default parameters @@ -1096,14 +1098,15 @@ class EWM(_Rolling): (if adjust is True), and 1-alpha and alpha (if adjust is False). More details can be found at - http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-moment-functions + http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows """ _attributes = ['com', 'min_periods', 'freq', 'adjust', 'ignore_na', 'axis'] - def __init__(self, obj, com=None, span=None, halflife=None, min_periods=0, - freq=None, adjust=True, ignore_na=False, axis=0): + def __init__(self, obj, com=None, span=None, halflife=None, alpha=None, + min_periods=0, freq=None, adjust=True, ignore_na=False, + axis=0): self.obj = obj - self.com = _get_center_of_mass(com, span, halflife) + self.com = _get_center_of_mass(com, span, halflife, alpha) self.min_periods = min_periods self.freq = freq self.adjust = adjust @@ -1320,20 +1323,32 @@ def dataframe_from_int_dict(data, frame_template): return _flex_binary_moment(arg2, arg1, f) -def _get_center_of_mass(com, span, halflife): - valid_count = len([x for x in [com, span, halflife] if x is not None]) +def _get_center_of_mass(com, span, halflife, alpha): + valid_count = len([x for x in [com, span, halflife, alpha] + if x is not None]) if valid_count > 1: - raise Exception("com, span, and halflife are mutually exclusive") - - if span is not None: - # convert span to center of mass + raise ValueError("com, span, halflife, and alpha " + "are mutually exclusive") + + # Convert to center of mass; domain checks ensure 0 < alpha <= 1 + if com is not None: + if com < 0: + raise ValueError("com must satisfy: com >= 0") + elif span is not None: + if span < 1: + raise ValueError("span must satisfy: span >= 1") com = (span - 1) / 2. elif halflife is not None: - # convert halflife to center of mass + if halflife <= 0: + raise ValueError("halflife must satisfy: halflife > 0") decay = 1 - np.exp(np.log(0.5) / halflife) com = 1 / decay - 1 - elif com is None: - raise Exception("Must pass one of com, span, or halflife") + elif alpha is not None: + if alpha <= 0 or alpha > 1: + raise ValueError("alpha must satisfy: 0 < alpha <= 1") + com = (1.0 - alpha) / alpha + else: + raise ValueError("Must pass one of com, span, halflife, or alpha") return float(com) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index c875a9d49039b..46d30ab7fe313 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -67,13 +67,21 @@ """ -_ewm_kw = r"""com : float. optional - Center of mass: :math:`\alpha = 1 / (1 + com)`, +_ewm_kw = r"""com : float, optional + Specify decay in terms of center of mass, + :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0` span : float, optional - Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)` + Specify decay in terms of span, + :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1` halflife : float, optional - Specify decay in terms of halflife, - :math:`\alpha = 1 - exp(log(0.5) / halflife)` + Specify decay in terms of half-life, + :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0` +alpha : float, optional + Specify smoothing factor :math:`\alpha` directly, + :math:`0 < \alpha \leq 1` + + .. versionadded:: 0.18.0 + min_periods : int, default 0 Minimum number of observations in window required to have a value (otherwise result is NA). @@ -92,16 +100,10 @@ _ewm_notes = r""" Notes ----- -Either center of mass, span or halflife must be specified - -EWMA is sometimes specified using a "span" parameter `s`, we have that the -decay parameter :math:`\alpha` is related to the span as -:math:`\alpha = 2 / (s + 1) = 1 / (1 + c)` - -where `c` is the center of mass. Given a span, the associated center of mass is -:math:`c = (s - 1) / 2` - -So a "20-day EWMA" would have center 9.5. +Exactly one of center of mass, span, half-life, and alpha must be provided. +Allowed values and relationship between the parameters are specified in the +parameter descriptions above; see the link at the end of this section for +a detailed explanation. When adjust is True (default), weighted averages are calculated using weights (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. @@ -121,7 +123,7 @@ True), and 1-alpha and alpha (if adjust is False). More details can be found at -http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-moment-functions +http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows """ _expanding_kw = """min_periods : int, default None @@ -323,14 +325,15 @@ def rolling_corr(arg1, arg2=None, window=None, pairwise=None, **kwargs): @Substitution("Exponentially-weighted moving average", _unary_arg, _ewm_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) -def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None, - adjust=True, how=None, ignore_na=False): +def ewma(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, + freq=None, adjust=True, how=None, ignore_na=False): return ensure_compat('ewm', 'mean', arg, com=com, span=span, halflife=halflife, + alpha=alpha, min_periods=min_periods, freq=freq, adjust=adjust, @@ -341,14 +344,15 @@ def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None, @Substitution("Exponentially-weighted moving variance", _unary_arg, _ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) -def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, - freq=None, how=None, ignore_na=False, adjust=True): +def ewmvar(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, + bias=False, freq=None, how=None, ignore_na=False, adjust=True): return ensure_compat('ewm', 'var', arg, com=com, span=span, halflife=halflife, + alpha=alpha, min_periods=min_periods, freq=freq, adjust=adjust, @@ -361,14 +365,15 @@ def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, @Substitution("Exponentially-weighted moving std", _unary_arg, _ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) -def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, - freq=None, how=None, ignore_na=False, adjust=True): +def ewmstd(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, + bias=False, freq=None, how=None, ignore_na=False, adjust=True): return ensure_compat('ewm', 'std', arg, com=com, span=span, halflife=halflife, + alpha=alpha, min_periods=min_periods, freq=freq, adjust=adjust, @@ -383,9 +388,9 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, @Substitution("Exponentially-weighted moving covariance", _binary_arg_flex, _ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) -def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, - bias=False, freq=None, pairwise=None, how=None, ignore_na=False, - adjust=True): +def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, alpha=None, + min_periods=0, bias=False, freq=None, pairwise=None, how=None, + ignore_na=False, adjust=True): if arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise @@ -401,6 +406,7 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, com=com, span=span, halflife=halflife, + alpha=alpha, min_periods=min_periods, bias=bias, freq=freq, @@ -414,8 +420,9 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, @Substitution("Exponentially-weighted moving correlation", _binary_arg_flex, _ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) -def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, - freq=None, pairwise=None, how=None, ignore_na=False, adjust=True): +def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, alpha=None, + min_periods=0, freq=None, pairwise=None, how=None, ignore_na=False, + adjust=True): if arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise @@ -430,6 +437,7 @@ def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, com=com, span=span, halflife=halflife, + alpha=alpha, min_periods=min_periods, freq=freq, how=how, diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 1b3351ae903bc..0d3b7e967f3ce 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -15,7 +15,7 @@ notnull, concat) from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, - assert_index_equal) + assert_index_equal, assert_numpy_array_equal) import pandas.core.datetools as datetools import pandas.stats.moments as mom import pandas.core.window as rwindow @@ -1249,8 +1249,8 @@ def test_ewma_span_com_args(self): B = mom.ewma(self.arr, span=20) assert_almost_equal(A, B) - self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20) - self.assertRaises(Exception, mom.ewma, self.arr) + self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5, span=20) + self.assertRaises(ValueError, mom.ewma, self.arr) def test_ewma_halflife_arg(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -1258,13 +1258,78 @@ def test_ewma_halflife_arg(self): B = mom.ewma(self.arr, halflife=10.0) assert_almost_equal(A, B) - self.assertRaises(Exception, mom.ewma, self.arr, span=20, + self.assertRaises(ValueError, mom.ewma, self.arr, span=20, halflife=50) - self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, + self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5, halflife=50) - self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20, + self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5, span=20, halflife=50) - self.assertRaises(Exception, mom.ewma, self.arr) + self.assertRaises(ValueError, mom.ewma, self.arr) + + def test_ewma_alpha_old_api(self): + # GH 10789 + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + a = mom.ewma(self.arr, alpha=0.61722699889169674) + b = mom.ewma(self.arr, com=0.62014947789973052) + c = mom.ewma(self.arr, span=2.240298955799461) + d = mom.ewma(self.arr, halflife=0.721792864318) + assert_numpy_array_equal(a, b) + assert_numpy_array_equal(a, c) + assert_numpy_array_equal(a, d) + + def test_ewma_alpha_arg_old_api(self): + # GH 10789 + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertRaises(ValueError, mom.ewma, self.arr) + self.assertRaises(ValueError, mom.ewma, self.arr, + com=10.0, alpha=0.5) + self.assertRaises(ValueError, mom.ewma, self.arr, + span=10.0, alpha=0.5) + self.assertRaises(ValueError, mom.ewma, self.arr, + halflife=10.0, alpha=0.5) + + def test_ewm_alpha(self): + # GH 10789 + s = Series(self.arr) + a = s.ewm(alpha=0.61722699889169674).mean() + b = s.ewm(com=0.62014947789973052).mean() + c = s.ewm(span=2.240298955799461).mean() + d = s.ewm(halflife=0.721792864318).mean() + assert_series_equal(a, b) + assert_series_equal(a, c) + assert_series_equal(a, d) + + def test_ewm_alpha_arg(self): + # GH 10789 + s = Series(self.arr) + self.assertRaises(ValueError, s.ewm) + self.assertRaises(ValueError, s.ewm, com=10.0, alpha=0.5) + self.assertRaises(ValueError, s.ewm, span=10.0, alpha=0.5) + self.assertRaises(ValueError, s.ewm, halflife=10.0, alpha=0.5) + + def test_ewm_domain_checks(self): + # GH 12492 + s = Series(self.arr) + # com must satisfy: com >= 0 + self.assertRaises(ValueError, s.ewm, com=-0.1) + s.ewm(com=0.0) + s.ewm(com=0.1) + # span must satisfy: span >= 1 + self.assertRaises(ValueError, s.ewm, span=-0.1) + self.assertRaises(ValueError, s.ewm, span=0.0) + self.assertRaises(ValueError, s.ewm, span=0.9) + s.ewm(span=1.0) + s.ewm(span=1.1) + # halflife must satisfy: halflife > 0 + self.assertRaises(ValueError, s.ewm, halflife=-0.1) + self.assertRaises(ValueError, s.ewm, halflife=0.0) + s.ewm(halflife=0.1) + # alpha must satisfy: 0 < alpha <= 1 + self.assertRaises(ValueError, s.ewm, alpha=-0.1) + self.assertRaises(ValueError, s.ewm, alpha=0.0) + s.ewm(alpha=0.1) + s.ewm(alpha=1.0) + self.assertRaises(ValueError, s.ewm, alpha=1.1) def test_ew_empty_arrays(self): arr = np.array([], dtype=np.float64)