Skip to content

Commit 74b59a3

Browse files
authored
ENH: ExponentialMovingWindow.sum (#43871)
1 parent 679e058 commit 74b59a3

File tree

8 files changed

+207
-100
lines changed

8 files changed

+207
-100
lines changed

doc/source/reference/window.rst

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ Exponentially-weighted window functions
8888
:toctree: api/
8989

9090
ExponentialMovingWindow.mean
91+
ExponentialMovingWindow.sum
9192
ExponentialMovingWindow.std
9293
ExponentialMovingWindow.var
9394
ExponentialMovingWindow.corr

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ Other enhancements
128128
- Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`)
129129
- :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
130130
- The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`)
131-
-
131+
- Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)
132132

133133
.. ---------------------------------------------------------------------------
134134

pandas/_libs/window/aggregations.pyi

+2-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def roll_weighted_var(
100100
minp: int, # int64_t
101101
ddof: int, # unsigned int
102102
) -> np.ndarray: ... # np.ndarray[np.float64]
103-
def ewma(
103+
def ewm(
104104
vals: np.ndarray, # const float64_t[:]
105105
start: np.ndarray, # const int64_t[:]
106106
end: np.ndarray, # const int64_t[:]
@@ -109,6 +109,7 @@ def ewma(
109109
adjust: bool,
110110
ignore_na: bool,
111111
deltas: np.ndarray, # const float64_t[:]
112+
normalize: bool,
112113
) -> np.ndarray: ... # np.ndarray[np.float64]
113114
def ewmcov(
114115
input_x: np.ndarray, # const float64_t[:]

pandas/_libs/window/aggregations.pyx

+30-27
Original file line numberDiff line numberDiff line change
@@ -1604,13 +1604,13 @@ def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights,
16041604

16051605

16061606
# ----------------------------------------------------------------------
1607-
# Exponentially weighted moving average
1607+
# Exponentially weighted moving
16081608

1609-
def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
1610-
int minp, float64_t com, bint adjust, bint ignore_na,
1611-
const float64_t[:] deltas=None) -> np.ndarray:
1609+
def ewm(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
1610+
int minp, float64_t com, bint adjust, bint ignore_na,
1611+
const float64_t[:] deltas=None, bint normalize=True) -> np.ndarray:
16121612
"""
1613-
Compute exponentially-weighted moving average using center-of-mass.
1613+
Compute exponentially-weighted moving average or sum using center-of-mass.
16141614

16151615
Parameters
16161616
----------
@@ -1623,6 +1623,8 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
16231623
ignore_na : bool
16241624
deltas : ndarray (float64 type), optional. If None, implicitly assumes equally
16251625
spaced points (used when `times` is not passed)
1626+
normalize : bool, optional.
1627+
If True, calculate the mean. If False, calculate the sum.
16261628

16271629
Returns
16281630
-------
@@ -1634,7 +1636,7 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
16341636
const float64_t[:] sub_vals
16351637
const float64_t[:] sub_deltas=None
16361638
ndarray[float64_t] sub_output, output = np.empty(N, dtype=np.float64)
1637-
float64_t alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur
1639+
float64_t alpha, old_wt_factor, new_wt, weighted, old_wt, cur
16381640
bint is_observation, use_deltas
16391641

16401642
if N == 0:
@@ -1657,48 +1659,49 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
16571659
win_size = len(sub_vals)
16581660
sub_output = np.empty(win_size, dtype=np.float64)
16591661

1660-
weighted_avg = sub_vals[0]
1661-
is_observation = weighted_avg == weighted_avg
1662+
weighted = sub_vals[0]
1663+
is_observation = weighted == weighted
16621664
nobs = int(is_observation)
1663-
sub_output[0] = weighted_avg if nobs >= minp else NaN
1665+
sub_output[0] = weighted if nobs >= minp else NaN
16641666
old_wt = 1.
16651667

16661668
with nogil:
16671669
for i in range(1, win_size):
16681670
cur = sub_vals[i]
16691671
is_observation = cur == cur
16701672
nobs += is_observation
1671-
if weighted_avg == weighted_avg:
1673+
if weighted == weighted:
16721674

16731675
if is_observation or not ignore_na:
1674-
if use_deltas:
1675-
old_wt *= old_wt_factor ** sub_deltas[i - 1]
1676+
if normalize:
1677+
if use_deltas:
1678+
old_wt *= old_wt_factor ** sub_deltas[i - 1]
1679+
else:
1680+
old_wt *= old_wt_factor
16761681
else:
1677-
old_wt *= old_wt_factor
1682+
weighted = old_wt_factor * weighted
16781683
if is_observation:
1679-
1680-
# avoid numerical errors on constant series
1681-
if weighted_avg != cur:
1682-
weighted_avg = ((old_wt * weighted_avg) +
1683-
(new_wt * cur)) / (old_wt + new_wt)
1684-
if adjust:
1685-
old_wt += new_wt
1684+
if normalize:
1685+
# avoid numerical errors on constant series
1686+
if weighted != cur:
1687+
weighted = old_wt * weighted + new_wt * cur
1688+
weighted /= (old_wt + new_wt)
1689+
if adjust:
1690+
old_wt += new_wt
1691+
else:
1692+
old_wt = 1.
16861693
else:
1687-
old_wt = 1.
1694+
weighted += cur
16881695
elif is_observation:
1689-
weighted_avg = cur
1696+
weighted = cur
16901697

1691-
sub_output[i] = weighted_avg if nobs >= minp else NaN
1698+
sub_output[i] = weighted if nobs >= minp else NaN
16921699

16931700
output[s:e] = sub_output
16941701

16951702
return output
16961703

16971704

1698-
# ----------------------------------------------------------------------
1699-
# Exponentially weighted moving covariance
1700-
1701-
17021705
def ewmcov(const float64_t[:] input_x, const int64_t[:] start, const int64_t[:] end,
17031706
int minp, const float64_t[:] input_y, float64_t com, bint adjust,
17041707
bint ignore_na, bint bias) -> np.ndarray:

pandas/core/window/ewm.py

+73-12
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@
4646
window_agg_numba_parameters,
4747
)
4848
from pandas.core.window.numba_ import (
49-
generate_ewma_numba_table_func,
50-
generate_numba_ewma_func,
49+
generate_numba_ewm_func,
50+
generate_numba_ewm_table_func,
5151
)
5252
from pandas.core.window.online import (
5353
EWMMeanState,
@@ -469,17 +469,21 @@ def aggregate(self, func, *args, **kwargs):
469469
def mean(self, *args, engine=None, engine_kwargs=None, **kwargs):
470470
if maybe_use_numba(engine):
471471
if self.method == "single":
472-
ewma_func = generate_numba_ewma_func(
473-
engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas
474-
)
475-
numba_cache_key = (lambda x: x, "ewma")
472+
func = generate_numba_ewm_func
473+
numba_cache_key = (lambda x: x, "ewm_mean")
476474
else:
477-
ewma_func = generate_ewma_numba_table_func(
478-
engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas
479-
)
480-
numba_cache_key = (lambda x: x, "ewma_table")
475+
func = generate_numba_ewm_table_func
476+
numba_cache_key = (lambda x: x, "ewm_mean_table")
477+
ewm_func = func(
478+
engine_kwargs=engine_kwargs,
479+
com=self._com,
480+
adjust=self.adjust,
481+
ignore_na=self.ignore_na,
482+
deltas=self._deltas,
483+
normalize=True,
484+
)
481485
return self._apply(
482-
ewma_func,
486+
ewm_func,
483487
numba_cache_key=numba_cache_key,
484488
)
485489
elif engine in ("cython", None):
@@ -489,11 +493,68 @@ def mean(self, *args, engine=None, engine_kwargs=None, **kwargs):
489493

490494
deltas = None if self.times is None else self._deltas
491495
window_func = partial(
492-
window_aggregations.ewma,
496+
window_aggregations.ewm,
497+
com=self._com,
498+
adjust=self.adjust,
499+
ignore_na=self.ignore_na,
500+
deltas=deltas,
501+
normalize=True,
502+
)
503+
return self._apply(window_func)
504+
else:
505+
raise ValueError("engine must be either 'numba' or 'cython'")
506+
507+
@doc(
508+
template_header,
509+
create_section_header("Parameters"),
510+
args_compat,
511+
window_agg_numba_parameters,
512+
kwargs_compat,
513+
create_section_header("Returns"),
514+
template_returns,
515+
create_section_header("See Also"),
516+
template_see_also,
517+
create_section_header("Notes"),
518+
numba_notes.replace("\n", "", 1),
519+
window_method="ewm",
520+
aggregation_description="(exponential weighted moment) sum",
521+
agg_method="sum",
522+
)
523+
def sum(self, *args, engine=None, engine_kwargs=None, **kwargs):
524+
if not self.adjust:
525+
raise NotImplementedError("sum is not implemented with adjust=False")
526+
if maybe_use_numba(engine):
527+
if self.method == "single":
528+
func = generate_numba_ewm_func
529+
numba_cache_key = (lambda x: x, "ewm_sum")
530+
else:
531+
func = generate_numba_ewm_table_func
532+
numba_cache_key = (lambda x: x, "ewm_sum_table")
533+
ewm_func = func(
534+
engine_kwargs=engine_kwargs,
535+
com=self._com,
536+
adjust=self.adjust,
537+
ignore_na=self.ignore_na,
538+
deltas=self._deltas,
539+
normalize=False,
540+
)
541+
return self._apply(
542+
ewm_func,
543+
numba_cache_key=numba_cache_key,
544+
)
545+
elif engine in ("cython", None):
546+
if engine_kwargs is not None:
547+
raise ValueError("cython engine does not accept engine_kwargs")
548+
nv.validate_window_func("sum", args, kwargs)
549+
550+
deltas = None if self.times is None else self._deltas
551+
window_func = partial(
552+
window_aggregations.ewm,
493553
com=self._com,
494554
adjust=self.adjust,
495555
ignore_na=self.ignore_na,
496556
deltas=deltas,
557+
normalize=False,
497558
)
498559
return self._apply(window_func)
499560
else:

0 commit comments

Comments
 (0)