From 63d30ef2b9803cb3c1046a3e9ec1065c2c891748 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 27 Jun 2021 20:33:11 -0700 Subject: [PATCH 1/4] ENH: Add table method for EWM.mean --- pandas/core/window/ewm.py | 34 ++++++++++++--- pandas/core/window/numba_.py | 80 ++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 6 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index c1d532d94eb83..ee31dcde81b7a 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -40,7 +40,10 @@ ExponentialMovingWindowIndexer, GroupbyIndexer, ) -from pandas.core.window.numba_ import generate_numba_ewma_func +from pandas.core.window.numba_ import ( + generate_ewma_numba_table_func, + generate_numba_ewma_func, +) from pandas.core.window.online import ( EWMMeanState, generate_online_numba_ewma_func, @@ -200,6 +203,16 @@ class ExponentialMovingWindow(BaseWindow): If 1-D array like, a sequence with the same shape as the observations. Only applicable to ``mean()``. + method : str {'single', 'table'}, default 'single' + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + Only applicable to ``mean()`` + + .. versionadded:: 1.3.0 Returns ------- @@ -258,6 +271,7 @@ class ExponentialMovingWindow(BaseWindow): "ignore_na", "axis", "times", + "method", ] def __init__( @@ -272,6 +286,7 @@ def __init__( ignore_na: bool = False, axis: Axis = 0, times: str | np.ndarray | FrameOrSeries | None = None, + method: str = "single", *, selection=None, ): @@ -281,7 +296,7 @@ def __init__( on=None, center=False, closed=None, - method="single", + method=method, axis=axis, selection=selection, ) @@ -437,12 +452,19 @@ def aggregate(self, func, *args, **kwargs): ) def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): if maybe_use_numba(engine): - ewma_func = generate_numba_ewma_func( - engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas - ) + if self.method == "single": + ewma_func = generate_numba_ewma_func( + engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas + ) + numba_cache_key = (lambda x: x, "ewma") + else: + ewma_func = generate_ewma_numba_table_func( + engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas + ) + numba_cache_key = (lambda x: x, "ewma_table") return self._apply( ewma_func, - numba_cache_key=(lambda x: x, "ewma"), + numba_cache_key=numba_cache_key, ) elif engine in ("cython", None): if engine_kwargs is not None: diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index d00be0ea840a8..6714abc4af840 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -248,3 +248,83 @@ def nan_agg_with_axis(table): return result return nan_agg_with_axis + + +def generate_ewma_numba_table_func( + engine_kwargs: dict[str, bool] | None, + com: float, + adjust: bool, + ignore_na: bool, + deltas: np.ndarray, +): + """ + Generate a numba jitted ewma function applied table wise specified + by values from engine_kwargs. + + Parameters + ---------- + engine_kwargs : dict + dictionary of arguments to be passed into numba.jit + com : float + adjust : bool + ignore_na : bool + deltas : numpy.ndarray + + Returns + ------- + Numba function + """ + nopython, nogil, parallel = get_jit_arguments(engine_kwargs) + + cache_key = (lambda x: x, "ewma_table") + if cache_key in NUMBA_FUNC_CACHE: + return NUMBA_FUNC_CACHE[cache_key] + + numba = import_optional_dependency("numba") + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def ewma_table( + values: np.ndarray, + begin: np.ndarray, + end: np.ndarray, + minimum_periods: int, + ) -> np.ndarray: + alpha = 1.0 / (1.0 + com) + old_wt_factor = 1.0 - alpha + new_wt = 1.0 if adjust else alpha + old_wt = np.ones(values.shape[0]) + + result = np.empty(values.shape) + weighted_avg = values[0] + nobs = (~np.isnan(weighted_avg)).astype(np.int64) + result[0] = np.where(nobs >= minimum_periods, weighted_avg, np.nan) + + for i in range(1, len(values)): + cur = values[i] + is_observations = ~np.isnan(cur) + nobs += is_observations.astype(np.int64) + for j in numba.prange(len(cur)): + if not np.isnan(weighted_avg[j]): + if is_observations[j] or not ignore_na: + + # note that len(deltas) = len(vals) - 1 and deltas[i] is to be + # used in conjunction with vals[i+1] + old_wt[j] *= old_wt_factor ** deltas[j - 1] + if is_observations[j]: + # avoid numerical errors on constant series + if weighted_avg[j] != cur[j]: + weighted_avg[j] = ( + (old_wt[j] * weighted_avg[j]) + (new_wt * cur[j]) + ) / (old_wt[j] + new_wt) + if adjust: + old_wt[j] += new_wt + else: + old_wt[j] = 1.0 + elif is_observations[j]: + weighted_avg[j] = cur[j] + + result[i] = np.where(nobs >= minimum_periods, weighted_avg, np.nan) + + return result + + return ewma_table From d875d8e2d26a5eaca5f77dbea18768020aa60a88 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 27 Jun 2021 22:48:02 -0700 Subject: [PATCH 2/4] Fix some tests and add whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/generic.py | 2 ++ pandas/core/window/numba_.py | 2 +- pandas/tests/window/test_numba.py | 13 +++++++++++++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 49c168cd5eb84..c044d29890f25 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -240,7 +240,7 @@ For example: Other enhancements ^^^^^^^^^^^^^^^^^^ -- :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, and :meth:`Series.expanding` now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview ` for performance and functional benefits (:issue:`15095`, :issue:`38995`) +- :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.ewm`, :meth:`DataFrame.ewm`, :meth:`Series.expanding` now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview ` for performance and functional benefits (:issue:`15095`, :issue:`38995`) - :class:`.ExponentialMovingWindow` now support a ``online`` method that can perform ``mean`` calculations in an online fashion. See :ref:`Window Overview ` (:issue:`41673`) - Added :meth:`MultiIndex.dtypes` (:issue:`37062`) - Added ``end`` and ``end_day`` options for the ``origin`` argument in :meth:`DataFrame.resample` (:issue:`37804`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 82895ab9eb67a..da96fef2bf025 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10905,6 +10905,7 @@ def ewm( ignore_na: bool_t = False, axis: Axis = 0, times: str | np.ndarray | FrameOrSeries | None = None, + method: str = "single", ) -> ExponentialMovingWindow: axis = self._get_axis_number(axis) # error: Value of type variable "FrameOrSeries" of "ExponentialMovingWindow" @@ -10920,6 +10921,7 @@ def ewm( ignore_na=ignore_na, axis=axis, times=times, + method=method, ) # ---------------------------------------------------------------------- diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 6714abc4af840..b6be9a8abe075 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -295,7 +295,7 @@ def ewma_table( old_wt = np.ones(values.shape[0]) result = np.empty(values.shape) - weighted_avg = values[0] + weighted_avg = values[0].copy() nobs = (~np.isnan(weighted_avg)).astype(np.int64) result[0] = np.where(nobs >= minimum_periods, weighted_avg, np.nan) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index b79c367d482ae..581c1a530c155 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -304,3 +304,16 @@ def test_table_method_expanding_methods( engine_kwargs=engine_kwargs, engine="numba" ) tm.assert_frame_equal(result, expected) + + def test_table_method_ewm(self, axis, nogil, parallel, nopython): + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(np.eye(3)) + + result = df.ewm(com=1, method="table", axis=axis).mean( + engine_kwargs=engine_kwargs, engine="numba" + ) + expected = df.ewm(com=1, method="single", axis=axis).mean( + engine_kwargs=engine_kwargs, engine="numba" + ) + tm.assert_frame_equal(result, expected) From b0fe2fbf72cfc8b494d2a1f0f5e53fbde8da8976 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 27 Jun 2021 22:50:00 -0700 Subject: [PATCH 3/4] Add issue number --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c044d29890f25..95915578c4ca8 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -240,7 +240,7 @@ For example: Other enhancements ^^^^^^^^^^^^^^^^^^ -- :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.ewm`, :meth:`DataFrame.ewm`, :meth:`Series.expanding` now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview ` for performance and functional benefits (:issue:`15095`, :issue:`38995`) +- :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.ewm`, :meth:`DataFrame.ewm`, :meth:`Series.expanding` now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview ` for performance and functional benefits (:issue:`15095`, :issue:`38995`, :issue:`42273`) - :class:`.ExponentialMovingWindow` now support a ``online`` method that can perform ``mean`` calculations in an online fashion. See :ref:`Window Overview ` (:issue:`41673`) - Added :meth:`MultiIndex.dtypes` (:issue:`37062`) - Added ``end`` and ``end_day`` options for the ``origin`` argument in :meth:`DataFrame.resample` (:issue:`37804`) From b14ea969420f184459630508dea1cb100cb2b492 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 28 Jun 2021 21:54:00 -0700 Subject: [PATCH 4/4] Add asv benchmark --- asv_bench/benchmarks/rolling.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index d35770b720f7a..97294fc02834b 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -296,5 +296,8 @@ def time_apply(self, method): table_method_func, raw=True, engine="numba" ) + def time_ewm_mean(self, method): + self.df.ewm(1, method=method).mean(engine="numba") + from .pandas_vb_common import setup # noqa: F401 isort:skip