Skip to content

ENH: Numba engine for EWM.mean #41267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ Other enhancements
- Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
- :class:`pandas.ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behaviour of append mode when writing to existing sheets (:issue:`40230`)
- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.ExponentialMovingWindow.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`, :issue:`41267`)
- :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
- :meth:`DataFrame.applymap` can now accept kwargs to pass on to func (:issue:`39987`)
Expand Down
85 changes: 30 additions & 55 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,18 @@
args_compat,
create_section_header,
kwargs_compat,
numba_notes,
template_header,
template_returns,
template_see_also,
window_agg_numba_parameters,
)
from pandas.core.window.indexers import (
BaseIndexer,
ExponentialMovingWindowIndexer,
GroupbyIndexer,
)
from pandas.core.window.numba_ import generate_numba_groupby_ewma_func
from pandas.core.window.numba_ import generate_numba_ewma_func
from pandas.core.window.rolling import (
BaseWindow,
BaseWindowGroupby,
Expand Down Expand Up @@ -372,26 +374,41 @@ def aggregate(self, func, *args, **kwargs):
template_header,
create_section_header("Parameters"),
args_compat,
window_agg_numba_parameters,
kwargs_compat,
create_section_header("Returns"),
template_returns,
create_section_header("See Also"),
template_see_also[:-1],
template_see_also,
create_section_header("Notes"),
numba_notes.replace("\n", "", 1),
window_method="ewm",
aggregation_description="(exponential weighted moment) mean",
agg_method="mean",
)
def mean(self, *args, **kwargs):
nv.validate_window_func("mean", args, kwargs)
window_func = window_aggregations.ewma
window_func = partial(
window_func,
com=self._com,
adjust=self.adjust,
ignore_na=self.ignore_na,
deltas=self._deltas,
)
return self._apply(window_func)
def mean(self, *args, engine=None, engine_kwargs=None, **kwargs):
if maybe_use_numba(engine):
ewma_func = generate_numba_ewma_func(
engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas
)
return self._apply(
ewma_func,
numba_cache_key=(lambda x: x, "ewma"),
)
elif engine in ("cython", None):
if engine_kwargs is not None:
raise ValueError("cython engine does not accept engine_kwargs")
nv.validate_window_func("mean", args, kwargs)
window_func = partial(
window_aggregations.ewma,
com=self._com,
adjust=self.adjust,
ignore_na=self.ignore_na,
deltas=self._deltas,
)
return self._apply(window_func)
else:
raise ValueError("engine must be either 'numba' or 'cython'")

@doc(
template_header,
Expand Down Expand Up @@ -635,45 +652,3 @@ def _get_window_indexer(self) -> GroupbyIndexer:
window_indexer=ExponentialMovingWindowIndexer,
)
return window_indexer

def mean(self, engine=None, engine_kwargs=None):
"""
Parameters
----------
engine : str, default None
* ``'cython'`` : Runs mean through C-extensions from cython.
* ``'numba'`` : Runs mean through JIT compiled code from numba.
Only available when ``raw`` is set to ``True``.
* ``None`` : Defaults to ``'cython'`` or globally setting
``compute.use_numba``

.. versionadded:: 1.2.0

engine_kwargs : dict, default None
* For ``'cython'`` engine, there are no accepted ``engine_kwargs``
* For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
and ``parallel`` dictionary keys. The values must either be ``True`` or
``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
``{'nopython': True, 'nogil': False, 'parallel': False}``.

.. versionadded:: 1.2.0

Returns
-------
Series or DataFrame
Return type is determined by the caller.
"""
if maybe_use_numba(engine):
groupby_ewma_func = generate_numba_groupby_ewma_func(
engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas
)
return self._apply(
groupby_ewma_func,
numba_cache_key=(lambda x: x, "groupby_ewma"),
)
elif engine in ("cython", None):
if engine_kwargs is not None:
raise ValueError("cython engine does not accept engine_kwargs")
return super().mean()
else:
raise ValueError("engine must be either 'numba' or 'cython'")
16 changes: 8 additions & 8 deletions pandas/core/window/numba_.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,15 @@ def roll_apply(
return roll_apply


def generate_numba_groupby_ewma_func(
def generate_numba_ewma_func(
engine_kwargs: Optional[Dict[str, bool]],
com: float,
adjust: bool,
ignore_na: bool,
deltas: np.ndarray,
):
"""
Generate a numba jitted groupby ewma function specified by values
Generate a numba jitted ewma function specified by values
from engine_kwargs.

Parameters
Expand All @@ -106,30 +106,30 @@ def generate_numba_groupby_ewma_func(
"""
nopython, nogil, parallel = get_jit_arguments(engine_kwargs)

cache_key = (lambda x: x, "groupby_ewma")
cache_key = (lambda x: x, "ewma")
if cache_key in NUMBA_FUNC_CACHE:
return NUMBA_FUNC_CACHE[cache_key]

numba = import_optional_dependency("numba")

@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
def groupby_ewma(
def ewma(
values: np.ndarray,
begin: np.ndarray,
end: np.ndarray,
minimum_periods: int,
) -> np.ndarray:
result = np.empty(len(values))
alpha = 1.0 / (1.0 + com)
old_wt_factor = 1.0 - alpha
new_wt = 1.0 if adjust else alpha

for i in numba.prange(len(begin)):
start = begin[i]
stop = end[i]
window = values[start:stop]
sub_result = np.empty(len(window))

old_wt_factor = 1.0 - alpha
new_wt = 1.0 if adjust else alpha

weighted_avg = window[0]
nobs = int(not np.isnan(weighted_avg))
sub_result[0] = weighted_avg if nobs >= minimum_periods else np.nan
Expand Down Expand Up @@ -166,7 +166,7 @@ def groupby_ewma(

return result

return groupby_ewma
return ewma


def generate_numba_table_func(
Expand Down
40 changes: 27 additions & 13 deletions pandas/tests/window/test_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,30 +123,44 @@ def func_2(x):


@td.skip_if_no("numba", "0.46.0")
class TestGroupbyEWMMean:
def test_invalid_engine(self):
class TestEWMMean:
@pytest.mark.parametrize(
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
)
def test_invalid_engine(self, grouper):
df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
with pytest.raises(ValueError, match="engine must be either"):
df.groupby("A").ewm(com=1.0).mean(engine="foo")
grouper(df).ewm(com=1.0).mean(engine="foo")

def test_invalid_engine_kwargs(self):
@pytest.mark.parametrize(
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
)
def test_invalid_engine_kwargs(self, grouper):
df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
with pytest.raises(ValueError, match="cython engine does not"):
df.groupby("A").ewm(com=1.0).mean(
grouper(df).ewm(com=1.0).mean(
engine="cython", engine_kwargs={"nopython": True}
)

def test_cython_vs_numba(self, nogil, parallel, nopython, ignore_na, adjust):
@pytest.mark.parametrize(
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
)
def test_cython_vs_numba(
self, grouper, nogil, parallel, nopython, ignore_na, adjust
):
df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
gb_ewm = df.groupby("A").ewm(com=1.0, adjust=adjust, ignore_na=ignore_na)
ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na)

engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
result = gb_ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
expected = gb_ewm.mean(engine="cython")
result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
expected = ewm.mean(engine="cython")

tm.assert_frame_equal(result, expected)

def test_cython_vs_numba_times(self, nogil, parallel, nopython, ignore_na):
@pytest.mark.parametrize(
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
)
def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na):
# GH 40951
halflife = "23 days"
times = to_datetime(
Expand All @@ -160,13 +174,13 @@ def test_cython_vs_numba_times(self, nogil, parallel, nopython, ignore_na):
]
)
df = DataFrame({"A": ["a", "b", "a", "b", "b", "a"], "B": [0, 0, 1, 1, 2, 2]})
gb_ewm = df.groupby("A").ewm(
ewm = grouper(df).ewm(
halflife=halflife, adjust=True, ignore_na=ignore_na, times=times
)

engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
result = gb_ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
expected = gb_ewm.mean(engine="cython")
result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
expected = ewm.mean(engine="cython")

tm.assert_frame_equal(result, expected)

Expand Down