Skip to content

ENH: Add compute.use_numba configuration for automatically using numba #35182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jul 15, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ Other enhancements
- :meth:`read_csv` now accepts string values like "0", "0.0", "1", "1.0" as convertible to the nullable boolean dtype (:issue:`34859`)
- :class:`pandas.core.window.ExponentialMovingWindow` now supports a ``times`` argument that allows ``mean`` to be calculated with observations spaced by the timestamps in ``times`` (:issue:`34839`)
- :meth:`DataFrame.agg` and :meth:`Series.agg` now accept named aggregation for renaming the output columns/indexes. (:issue:`26513`)
- ``compute.use_numba`` now exists as a configuration option that utilizes the numba engine when available (:issue:`33966`)

.. ---------------------------------------------------------------------------

Expand Down
17 changes: 17 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,20 @@ def use_numexpr_cb(key):
expressions.set_use_numexpr(cf.get_option(key))


use_numba_doc = """
: bool
Use the numba engine option for select operations if it is installed,
the default is False
Valid values: False,True
"""


def use_numba_cb(key):
from pandas.core.util import numba_

numba_.set_use_numba(cf.get_option(key))


with cf.config_prefix("compute"):
cf.register_option(
"use_bottleneck",
Expand All @@ -63,6 +77,9 @@ def use_numexpr_cb(key):
cf.register_option(
"use_numexpr", True, use_numexpr_doc, validator=is_bool, cb=use_numexpr_cb
)
cf.register_option(
"use_numba", False, use_numba_doc, validator=is_bool, cb=use_numba_cb
)
#
# options from the "display" namespace

Expand Down
23 changes: 10 additions & 13 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
from pandas.core.util.numba_ import (
NUMBA_FUNC_CACHE,
generate_numba_func,
maybe_use_numba,
split_for_numba,
)

Expand Down Expand Up @@ -227,9 +228,7 @@ def apply(self, func, *args, **kwargs):
@doc(
_agg_template, examples=_agg_examples_doc, klass="Series",
)
def aggregate(
self, func=None, *args, engine="cython", engine_kwargs=None, **kwargs
):
def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):

relabeling = func is None
columns = None
Expand Down Expand Up @@ -480,7 +479,7 @@ def _aggregate_named(self, func, *args, **kwargs):

@Substitution(klass="Series")
@Appender(_transform_template)
def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs):
def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
func = self._get_cython_func(func) or func

if not isinstance(func, str):
Expand Down Expand Up @@ -512,7 +511,7 @@ def _transform_general(
Transform with a non-str `func`.
"""

if engine == "numba":
if maybe_use_numba(engine):
numba_func, cache_key = generate_numba_func(
func, engine_kwargs, kwargs, "groupby_transform"
)
Expand All @@ -522,7 +521,7 @@ def _transform_general(
results = []
for name, group in self:
object.__setattr__(group, "name", name)
if engine == "numba":
if maybe_use_numba(engine):
values, index = split_for_numba(group)
res = numba_func(values, index, *args)
if cache_key not in NUMBA_FUNC_CACHE:
Expand Down Expand Up @@ -931,13 +930,11 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
@doc(
_agg_template, examples=_agg_examples_doc, klass="DataFrame",
)
def aggregate(
self, func=None, *args, engine="cython", engine_kwargs=None, **kwargs
):
def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):

relabeling, func, columns, order = reconstruct_func(func, **kwargs)

if engine == "numba":
if maybe_use_numba(engine):
return self._python_agg_general(
func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
)
Expand Down Expand Up @@ -1378,7 +1375,7 @@ def _transform_general(
applied = []
obj = self._obj_with_exclusions
gen = self.grouper.get_iterator(obj, axis=self.axis)
if engine == "numba":
if maybe_use_numba(engine):
numba_func, cache_key = generate_numba_func(
func, engine_kwargs, kwargs, "groupby_transform"
)
Expand All @@ -1388,7 +1385,7 @@ def _transform_general(
for name, group in gen:
object.__setattr__(group, "name", name)

if engine == "numba":
if maybe_use_numba(engine):
values, index = split_for_numba(group)
res = numba_func(values, index, *args)
if cache_key not in NUMBA_FUNC_CACHE:
Expand Down Expand Up @@ -1439,7 +1436,7 @@ def _transform_general(

@Substitution(klass="DataFrame")
@Appender(_transform_template)
def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs):
def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):

# optimized transforms
func = self._get_cython_func(func) or func
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class providing the base-class of operations.
from pandas.core.indexes.api import CategoricalIndex, Index, MultiIndex
from pandas.core.series import Series
from pandas.core.sorting import get_group_index_sorter
from pandas.core.util.numba_ import maybe_use_numba

_common_see_also = """
See Also
Expand Down Expand Up @@ -286,9 +287,10 @@ class providing the base-class of operations.
.. versionchanged:: 1.1.0
*args
Positional arguments to pass to func
engine : str, default 'cython'
engine : str, default None
* ``'cython'`` : Runs the function through C-extensions from cython.
* ``'numba'`` : Runs the function through JIT compiled code from numba.
* ``None`` : Defaults to ``'cython'`` or global numba configuration.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you list the option here compute.use_numba


.. versionadded:: 1.1.0
engine_kwargs : dict, default None
Expand Down Expand Up @@ -393,9 +395,10 @@ class providing the base-class of operations.
.. versionchanged:: 1.1.0
*args
Positional arguments to pass to func
engine : str, default 'cython'
engine : str, default None
* ``'cython'`` : Runs the function through C-extensions from cython.
* ``'numba'`` : Runs the function through JIT compiled code from numba.
* ``None`` : Defaults to ``'cython'`` or global numba configuration.

.. versionadded:: 1.1.0
engine_kwargs : dict, default None
Expand Down Expand Up @@ -1063,7 +1066,7 @@ def _python_agg_general(
# agg_series below assumes ngroups > 0
continue

if engine == "numba":
if maybe_use_numba(engine):
result, counts = self.grouper.agg_series(
obj,
func,
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from pandas.core.util.numba_ import (
NUMBA_FUNC_CACHE,
generate_numba_func,
maybe_use_numba,
split_for_numba,
)

Expand Down Expand Up @@ -620,7 +621,7 @@ def agg_series(
# Caller is responsible for checking ngroups != 0
assert self.ngroups != 0

if engine == "numba":
if maybe_use_numba(engine):
return self._aggregate_series_pure_python(
obj, func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
)
Expand Down Expand Up @@ -678,7 +679,7 @@ def _aggregate_series_pure_python(
**kwargs,
):

if engine == "numba":
if maybe_use_numba(engine):
numba_func, cache_key = generate_numba_func(
func, engine_kwargs, kwargs, "groupby_agg"
)
Expand All @@ -691,7 +692,7 @@ def _aggregate_series_pure_python(
splitter = get_splitter(obj, group_index, ngroups, axis=0)

for label, group in splitter:
if engine == "numba":
if maybe_use_numba(engine):
values, index = split_for_numba(group)
res = numba_func(values, index, *args)
if cache_key not in NUMBA_FUNC_CACHE:
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/util/numba_.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,22 @@
from pandas.compat._optional import import_optional_dependency
from pandas.errors import NumbaUtilError

GLOBAL_USE_NUMBA: bool = False
NUMBA_FUNC_CACHE: Dict[Tuple[Callable, str], Callable] = dict()


def maybe_use_numba(engine: Optional[str]) -> bool:
"""Signal whether to use numba routines."""
return engine == "numba" or (engine is None and GLOBAL_USE_NUMBA)


def set_use_numba(enable: bool = False) -> None:
global GLOBAL_USE_NUMBA
if enable:
import_optional_dependency("numba")
GLOBAL_USE_NUMBA = enable


def check_kwargs_and_nopython(
kwargs: Optional[Dict] = None, nopython: Optional[bool] = None
) -> None:
Expand Down
37 changes: 16 additions & 21 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import pandas.core.common as com
from pandas.core.construction import extract_array
from pandas.core.indexes.api import Index, MultiIndex, ensure_index
from pandas.core.util.numba_ import NUMBA_FUNC_CACHE
from pandas.core.util.numba_ import NUMBA_FUNC_CACHE, maybe_use_numba
from pandas.core.window.common import (
WindowGroupByMixin,
_doc_template,
Expand Down Expand Up @@ -1298,10 +1298,11 @@ def count(self):
objects instead.
If you are just applying a NumPy reduction function this will
achieve much better performance.
engine : str, default 'cython'
engine : str, default None
* ``'cython'`` : Runs rolling apply through C-extensions from cython.
* ``'numba'`` : Runs rolling apply through JIT compiled code from numba.
Only available when ``raw`` is set to ``True``.
* ``None`` : Defaults to ``'cython'`` or global numba configuration.

.. versionadded:: 1.0.0

Expand Down Expand Up @@ -1357,18 +1358,7 @@ def apply(
if not is_bool(raw):
raise ValueError("raw parameter must be `True` or `False`")

if engine == "cython":
if engine_kwargs is not None:
raise ValueError("cython engine does not accept engine_kwargs")
# Cython apply functions handle center, so don't need to use
# _apply's center handling
window = self._get_window()
offset = calculate_center_offset(window) if self.center else 0
apply_func = self._generate_cython_apply_func(
args, kwargs, raw, offset, func
)
center = False
elif engine == "numba":
if maybe_use_numba(engine):
if raw is False:
raise ValueError("raw must be `True` when using the numba engine")
cache_key = (func, "rolling_apply")
Expand All @@ -1380,6 +1370,17 @@ def apply(
args, kwargs, func, engine_kwargs
)
center = self.center
elif engine in ("cython", None):
if engine_kwargs is not None:
raise ValueError("cython engine does not accept engine_kwargs")
# Cython apply functions handle center, so don't need to use
# _apply's center handling
window = self._get_window()
offset = calculate_center_offset(window) if self.center else 0
apply_func = self._generate_cython_apply_func(
args, kwargs, raw, offset, func
)
center = False
else:
raise ValueError("engine must be either 'numba' or 'cython'")

Expand Down Expand Up @@ -2053,13 +2054,7 @@ def count(self):
@Substitution(name="rolling")
@Appender(_shared_docs["apply"])
def apply(
self,
func,
raw=False,
engine="cython",
engine_kwargs=None,
args=None,
kwargs=None,
self, func, raw=False, engine=None, engine_kwargs=None, args=None, kwargs=None,
):
return super().apply(
func,
Expand Down
17 changes: 16 additions & 1 deletion pandas/tests/groupby/aggregate/test_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pandas.errors import NumbaUtilError
import pandas.util._test_decorators as td

from pandas import DataFrame
from pandas import DataFrame, option_context
import pandas._testing as tm
from pandas.core.util.numba_ import NUMBA_FUNC_CACHE

Expand Down Expand Up @@ -113,3 +113,18 @@ def func_2(values, index):
result = grouped.agg(func_1, engine="numba", engine_kwargs=engine_kwargs)
expected = grouped.agg(lambda x: np.mean(x) - 3.4, engine="cython")
tm.assert_equal(result, expected)


@td.skip_if_no("numba", "0.46.0")
def test_use_global_config():
def func_1(values, index):
return np.mean(values) - 3.4

data = DataFrame(
{0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1],
)
grouped = data.groupby(0)
expected = grouped.agg(func_1, engine="numba")
with option_context("compute.use_numba", True):
result = grouped.agg(func_1, engine=None)
tm.assert_frame_equal(expected, result)
17 changes: 16 additions & 1 deletion pandas/tests/groupby/transform/test_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pandas.errors import NumbaUtilError
import pandas.util._test_decorators as td

from pandas import DataFrame
from pandas import DataFrame, option_context
import pandas._testing as tm
from pandas.core.util.numba_ import NUMBA_FUNC_CACHE

Expand Down Expand Up @@ -112,3 +112,18 @@ def func_2(values, index):
result = grouped.transform(func_1, engine="numba", engine_kwargs=engine_kwargs)
expected = grouped.transform(lambda x: x + 1, engine="cython")
tm.assert_equal(result, expected)


@td.skip_if_no("numba", "0.46.0")
def test_use_global_config():
def func_1(values, index):
return values + 1

data = DataFrame(
{0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1],
)
grouped = data.groupby(0)
expected = grouped.transform(func_1, engine="numba")
with option_context("compute.use_numba", True):
result = grouped.transform(func_1, engine=None)
tm.assert_frame_equal(expected, result)
12 changes: 12 additions & 0 deletions pandas/tests/util/test_numba.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import pytest

import pandas.util._test_decorators as td

from pandas import option_context


@td.skip_if_installed("numba")
def test_numba_not_installed_option_context():
with pytest.raises(ImportError, match="Missing optional"):
with option_context("compute.use_numba", True):
pass
14 changes: 13 additions & 1 deletion pandas/tests/window/test_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pandas.util._test_decorators as td

from pandas import Series
from pandas import Series, option_context
import pandas._testing as tm
from pandas.core.util.numba_ import NUMBA_FUNC_CACHE

Expand Down Expand Up @@ -75,3 +75,15 @@ def func_2(x):
)
expected = roll.apply(func_1, engine="cython", raw=True)
tm.assert_series_equal(result, expected)


@td.skip_if_no("numba", "0.46.0")
def test_use_global_config():
def f(x):
return np.mean(x) + 2

s = Series(range(10))
with option_context("compute.use_numba", True):
result = s.rolling(2).apply(f, engine=None, raw=True)
expected = s.rolling(2).apply(f, engine="numba", raw=True)
tm.assert_series_equal(expected, result)