Skip to content

Commit 7e9f66e

Browse files
authored
ENH: Add support for calculating EWMA with a time component (#34839)
1 parent 9e44618 commit 7e9f66e

File tree

8 files changed

+233
-24
lines changed

8 files changed

+233
-24
lines changed

asv_bench/benchmarks/rolling.py

+7
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,18 @@ class EWMMethods:
9191
def setup(self, constructor, window, dtype, method):
9292
N = 10 ** 5
9393
arr = (100 * np.random.random(N)).astype(dtype)
94+
times = pd.date_range("1900", periods=N, freq="23s")
9495
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
96+
self.ewm_times = getattr(pd, constructor)(arr).ewm(
97+
halflife="1 Day", times=times
98+
)
9599

96100
def time_ewm(self, constructor, window, dtype, method):
97101
getattr(self.ewm, method)()
98102

103+
def time_ewm_times(self, constructor, window, dtype, method):
104+
self.ewm.mean()
105+
99106

100107
class VariableWindowMethods(Methods):
101108
params = (

doc/source/user_guide/computation.rst

+19
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,25 @@ and **alpha** to the EW functions:
10951095
one half.
10961096
* **Alpha** specifies the smoothing factor directly.
10971097

1098+
.. versionadded:: 1.1.0
1099+
1100+
You can also specify ``halflife`` in terms of a timedelta convertible unit to specify the amount of
1101+
time it takes for an observation to decay to half its value when also specifying a sequence
1102+
of ``times``.
1103+
1104+
.. ipython:: python
1105+
1106+
df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
1107+
df
1108+
times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17']
1109+
df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean()
1110+
1111+
The following formula is used to compute exponentially weighted mean with an input vector of times:
1112+
1113+
.. math::
1114+
1115+
y_t = \frac{\sum_{i=0}^t 0.5^\frac{t_{t} - t_{i}}{\lambda} x_{t-i}}{0.5^\frac{t_{t} - t_{i}}{\lambda}},
1116+
10981117
Here is an example for a univariate time series:
10991118

11001119
.. ipython:: python

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ Other enhancements
329329
- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`)
330330
- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`).
331331
- :meth:`read_csv` now accepts string values like "0", "0.0", "1", "1.0" as convertible to the nullable boolean dtype (:issue:`34859`)
332+
- :class:`pandas.core.window.ExponentialMovingWindow` now supports a ``times`` argument that allows ``mean`` to be calculated with observations spaced by the timestamps in ``times`` (:issue:`34839`)
332333

333334
.. ---------------------------------------------------------------------------
334335

pandas/_libs/window/aggregations.pyx

+53-8
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ from libc.stdlib cimport malloc, free
88

99
import numpy as np
1010
cimport numpy as cnp
11-
from numpy cimport ndarray, int64_t, float64_t, float32_t
11+
from numpy cimport ndarray, int64_t, float64_t, float32_t, uint8_t
1212
cnp.import_array()
1313

1414

@@ -1752,6 +1752,51 @@ def roll_weighted_var(float64_t[:] values, float64_t[:] weights,
17521752
# ----------------------------------------------------------------------
17531753
# Exponentially weighted moving average
17541754

1755+
def ewma_time(ndarray[float64_t] vals, int minp, ndarray[int64_t] times,
1756+
int64_t halflife):
1757+
"""
1758+
Compute exponentially-weighted moving average using halflife and time
1759+
distances.
1760+
1761+
Parameters
1762+
----------
1763+
vals : ndarray[float_64]
1764+
minp : int
1765+
times : ndarray[int64]
1766+
halflife : int64
1767+
1768+
Returns
1769+
-------
1770+
ndarray
1771+
"""
1772+
cdef:
1773+
Py_ssize_t i, num_not_nan = 0, N = len(vals)
1774+
bint is_not_nan
1775+
float64_t last_result
1776+
ndarray[uint8_t] mask = np.zeros(N, dtype=np.uint8)
1777+
ndarray[float64_t] weights, observations, output = np.empty(N, dtype=np.float64)
1778+
1779+
if N == 0:
1780+
return output
1781+
1782+
last_result = vals[0]
1783+
1784+
for i in range(N):
1785+
is_not_nan = vals[i] == vals[i]
1786+
num_not_nan += is_not_nan
1787+
if is_not_nan:
1788+
mask[i] = 1
1789+
weights = 0.5 ** ((times[i] - times[mask.view(np.bool_)]) / halflife)
1790+
observations = vals[mask.view(np.bool_)]
1791+
last_result = np.sum(weights * observations) / np.sum(weights)
1792+
1793+
if num_not_nan >= minp:
1794+
output[i] = last_result
1795+
else:
1796+
output[i] = NaN
1797+
1798+
return output
1799+
17551800

17561801
def ewma(float64_t[:] vals, float64_t com, bint adjust, bint ignore_na, int minp):
17571802
"""
@@ -1761,9 +1806,9 @@ def ewma(float64_t[:] vals, float64_t com, bint adjust, bint ignore_na, int minp
17611806
----------
17621807
vals : ndarray (float64 type)
17631808
com : float64
1764-
adjust: int
1765-
ignore_na: bool
1766-
minp: int
1809+
adjust : int
1810+
ignore_na : bool
1811+
minp : int
17671812
17681813
Returns
17691814
-------
@@ -1831,10 +1876,10 @@ def ewmcov(float64_t[:] input_x, float64_t[:] input_y,
18311876
input_x : ndarray (float64 type)
18321877
input_y : ndarray (float64 type)
18331878
com : float64
1834-
adjust: int
1835-
ignore_na: bool
1836-
minp: int
1837-
bias: int
1879+
adjust : int
1880+
ignore_na : bool
1881+
minp : int
1882+
bias : int
18381883
18391884
Returns
18401885
-------

pandas/core/generic.py

+2
Original file line numberDiff line numberDiff line change
@@ -10518,6 +10518,7 @@ def ewm(
1051810518
adjust=True,
1051910519
ignore_na=False,
1052010520
axis=0,
10521+
times=None,
1052110522
):
1052210523
axis = self._get_axis_number(axis)
1052310524
return ExponentialMovingWindow(
@@ -10530,6 +10531,7 @@ def ewm(
1053010531
adjust=adjust,
1053110532
ignore_na=ignore_na,
1053210533
axis=axis,
10534+
times=times,
1053310535
)
1053410536

1053510537
cls.ewm = ewm

pandas/core/window/ewm.py

+85-14
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
1+
import datetime
12
from functools import partial
23
from textwrap import dedent
34
from typing import Optional, Union
45

56
import numpy as np
67

8+
from pandas._libs.tslibs import Timedelta
79
import pandas._libs.window.aggregations as window_aggregations
8-
from pandas._typing import FrameOrSeries
10+
from pandas._typing import FrameOrSeries, TimedeltaConvertibleTypes
911
from pandas.compat.numpy import function as nv
1012
from pandas.util._decorators import Appender, Substitution, doc
1113

14+
from pandas.core.dtypes.common import is_datetime64_ns_dtype
1215
from pandas.core.dtypes.generic import ABCDataFrame
1316

1417
from pandas.core.base import DataError
15-
import pandas.core.common as com
18+
import pandas.core.common as common
1619
from pandas.core.window.common import _doc_template, _shared_docs, zsqrt
1720
from pandas.core.window.rolling import _flex_binary_moment, _Rolling
1821

@@ -32,7 +35,7 @@ def get_center_of_mass(
3235
halflife: Optional[float],
3336
alpha: Optional[float],
3437
) -> float:
35-
valid_count = com.count_not_none(comass, span, halflife, alpha)
38+
valid_count = common.count_not_none(comass, span, halflife, alpha)
3639
if valid_count > 1:
3740
raise ValueError("comass, span, halflife, and alpha are mutually exclusive")
3841

@@ -76,10 +79,17 @@ class ExponentialMovingWindow(_Rolling):
7679
span : float, optional
7780
Specify decay in terms of span,
7881
:math:`\alpha = 2 / (span + 1)`, for :math:`span \geq 1`.
79-
halflife : float, optional
82+
halflife : float, str, timedelta, optional
8083
Specify decay in terms of half-life,
8184
:math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for
8285
:math:`halflife > 0`.
86+
87+
If ``times`` is specified, the time unit (str or timedelta) over which an
88+
observation decays to half its value. Only applicable to ``mean()``
89+
and halflife value will not apply to the other functions.
90+
91+
.. versionadded:: 1.1.0
92+
8393
alpha : float, optional
8494
Specify smoothing factor :math:`\alpha` directly,
8595
:math:`0 < \alpha \leq 1`.
@@ -124,6 +134,18 @@ class ExponentialMovingWindow(_Rolling):
124134
axis : {0, 1}, default 0
125135
The axis to use. The value 0 identifies the rows, and 1
126136
identifies the columns.
137+
times : str, np.ndarray, Series, default None
138+
139+
.. versionadded:: 1.1.0
140+
141+
Times corresponding to the observations. Must be monotonically increasing and
142+
``datetime64[ns]`` dtype.
143+
144+
If str, the name of the column in the DataFrame representing the times.
145+
146+
If 1-D array like, a sequence with the same shape as the observations.
147+
148+
Only applicable to ``mean()``.
127149
128150
Returns
129151
-------
@@ -159,6 +181,17 @@ class ExponentialMovingWindow(_Rolling):
159181
2 1.615385
160182
3 1.615385
161183
4 3.670213
184+
185+
Specifying ``times`` with a timedelta ``halflife`` when computing mean.
186+
187+
>>> times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17']
188+
>>> df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean()
189+
B
190+
0 0.000000
191+
1 0.585786
192+
2 1.523889
193+
3 1.523889
194+
4 3.233686
162195
"""
163196

164197
_attributes = ["com", "min_periods", "adjust", "ignore_na", "axis"]
@@ -168,20 +201,49 @@ def __init__(
168201
obj,
169202
com: Optional[float] = None,
170203
span: Optional[float] = None,
171-
halflife: Optional[float] = None,
204+
halflife: Optional[Union[float, TimedeltaConvertibleTypes]] = None,
172205
alpha: Optional[float] = None,
173206
min_periods: int = 0,
174207
adjust: bool = True,
175208
ignore_na: bool = False,
176209
axis: int = 0,
210+
times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None,
177211
):
212+
self.com: Optional[float]
178213
self.obj = obj
179-
self.com = get_center_of_mass(com, span, halflife, alpha)
180214
self.min_periods = max(int(min_periods), 1)
181215
self.adjust = adjust
182216
self.ignore_na = ignore_na
183217
self.axis = axis
184218
self.on = None
219+
if times is not None:
220+
if isinstance(times, str):
221+
times = self._selected_obj[times]
222+
if not is_datetime64_ns_dtype(times):
223+
raise ValueError("times must be datetime64[ns] dtype.")
224+
if len(times) != len(obj):
225+
raise ValueError("times must be the same length as the object.")
226+
if not isinstance(halflife, (str, datetime.timedelta)):
227+
raise ValueError(
228+
"halflife must be a string or datetime.timedelta object"
229+
)
230+
self.times = np.asarray(times.astype(np.int64))
231+
self.halflife = Timedelta(halflife).value
232+
# Halflife is no longer applicable when calculating COM
233+
# But allow COM to still be calculated if the user passes other decay args
234+
if common.count_not_none(com, span, alpha) > 0:
235+
self.com = get_center_of_mass(com, span, None, alpha)
236+
else:
237+
self.com = None
238+
else:
239+
if halflife is not None and isinstance(halflife, (str, datetime.timedelta)):
240+
raise ValueError(
241+
"halflife can only be a timedelta convertible argument if "
242+
"times is not None."
243+
)
244+
self.times = None
245+
self.halflife = None
246+
self.com = get_center_of_mass(com, span, halflife, alpha)
185247

186248
@property
187249
def _constructor(self):
@@ -277,14 +339,23 @@ def mean(self, *args, **kwargs):
277339
Arguments and keyword arguments to be passed into func.
278340
"""
279341
nv.validate_window_func("mean", args, kwargs)
280-
window_func = self._get_roll_func("ewma")
281-
window_func = partial(
282-
window_func,
283-
com=self.com,
284-
adjust=self.adjust,
285-
ignore_na=self.ignore_na,
286-
minp=self.min_periods,
287-
)
342+
if self.times is not None:
343+
window_func = self._get_roll_func("ewma_time")
344+
window_func = partial(
345+
window_func,
346+
minp=self.min_periods,
347+
times=self.times,
348+
halflife=self.halflife,
349+
)
350+
else:
351+
window_func = self._get_roll_func("ewma")
352+
window_func = partial(
353+
window_func,
354+
com=self.com,
355+
adjust=self.adjust,
356+
ignore_na=self.ignore_na,
357+
minp=self.min_periods,
358+
)
288359
return self._apply(window_func)
289360

290361
@Substitution(name="ewm", func_name="std")

pandas/tests/window/conftest.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime
1+
from datetime import datetime, timedelta
22

33
import numpy as np
44
from numpy.random import randn
@@ -302,3 +302,9 @@ def series():
302302
def which(request):
303303
"""Turn parametrized which as fixture for series and frame"""
304304
return request.param
305+
306+
307+
@pytest.fixture(params=["1 day", timedelta(days=1)])
308+
def halflife_with_times(request):
309+
"""Halflife argument for EWM when times is specified."""
310+
return request.param

0 commit comments

Comments
 (0)