Skip to content

Commit f2f24a9

Browse files
ENH: Support third-party execution engines in Series.map (#61467)
1 parent 97897e1 commit f2f24a9

File tree

6 files changed

+136
-62
lines changed

6 files changed

+136
-62
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ Other enhancements
7373
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
7474
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
7575
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
76+
- :meth:`Series.map` now accepts an ``engine`` parameter to allow execution with a third-party execution engine (:issue:`61125`)
7677
- :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
7778
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
7879
- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)

pandas/core/series.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4328,6 +4328,7 @@ def map(
43284328
self,
43294329
func: Callable | Mapping | Series | None = None,
43304330
na_action: Literal["ignore"] | None = None,
4331+
engine: Callable | None = None,
43314332
**kwargs,
43324333
) -> Series:
43334334
"""
@@ -4344,6 +4345,25 @@ def map(
43444345
na_action : {None, 'ignore'}, default None
43454346
If 'ignore', propagate NaN values, without passing them to the
43464347
mapping correspondence.
4348+
engine : decorator, optional
4349+
Choose the execution engine to use to run the function. Only used for
4350+
functions. If ``map`` is called with a mapping or ``Series``, an
4351+
exception will be raised. If ``engine`` is not provided the function will
4352+
be executed by the regular Python interpreter.
4353+
4354+
Options include JIT compilers such as Numba, Bodo or Blosc2, which in some
4355+
cases can speed up the execution. To use an executor you can provide the
4356+
decorators ``numba.jit``, ``numba.njit``, ``bodo.jit`` or ``blosc2.jit``.
4357+
You can also provide the decorator with parameters, like
4358+
``numba.jit(nogit=True)``.
4359+
4360+
Not all functions can be executed with all execution engines. In general,
4361+
JIT compilers will require type stability in the function (no variable
4362+
should change data type during the execution). And not all pandas and
4363+
NumPy APIs are supported. Check the engine documentation for limitations.
4364+
4365+
.. versionadded:: 3.0.0
4366+
43474367
**kwargs
43484368
Additional keyword arguments to pass as keywords arguments to
43494369
`arg`.
@@ -4423,6 +4443,25 @@ def map(
44234443
else:
44244444
raise ValueError("The `func` parameter is required")
44254445

4446+
if engine is not None:
4447+
if not callable(func):
4448+
raise ValueError(
4449+
"The engine argument can only be specified when func is a function"
4450+
)
4451+
if not hasattr(engine, "__pandas_udf__"):
4452+
raise ValueError(f"Not a valid engine: {engine!r}")
4453+
result = engine.__pandas_udf__.map( # type: ignore[attr-defined]
4454+
data=self,
4455+
func=func,
4456+
args=(),
4457+
kwargs=kwargs,
4458+
decorator=engine,
4459+
skip_na=na_action == "ignore",
4460+
)
4461+
if not isinstance(result, Series):
4462+
result = Series(result, index=self.index, name=self.name)
4463+
return result.__finalize__(self, method="map")
4464+
44264465
if callable(func):
44274466
func = functools.partial(func, **kwargs)
44284467
new_values = self._map_values(func, na_action=na_action)

pandas/tests/apply/conftest.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas import (
5+
DataFrame,
6+
Series,
7+
)
8+
from pandas.api.executors import BaseExecutionEngine
9+
10+
11+
class MockExecutionEngine(BaseExecutionEngine):
12+
"""
13+
Execution Engine to test if the execution engine interface receives and
14+
uses all parameters provided by the user.
15+
16+
Making this engine work as the default Python engine by calling it, no extra
17+
functionality is implemented here.
18+
19+
When testing, this will be called when this engine is provided, and then the
20+
same pandas.map and pandas.apply function will be called, but without engine,
21+
executing the default behavior from the python engine.
22+
"""
23+
24+
def map(data, func, args, kwargs, decorator, skip_na):
25+
kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {}
26+
return data.map(func, na_action="ignore" if skip_na else None, **kwargs_to_pass)
27+
28+
def apply(data, func, args, kwargs, decorator, axis):
29+
if isinstance(data, Series):
30+
return data.apply(func, convert_dtype=True, args=args, by_row=False)
31+
elif isinstance(data, DataFrame):
32+
return data.apply(
33+
func,
34+
axis=axis,
35+
raw=False,
36+
result_type=None,
37+
args=args,
38+
by_row="compat",
39+
**kwargs,
40+
)
41+
else:
42+
assert isinstance(data, np.ndarray)
43+
44+
def wrap_function(func):
45+
# https://github.com/numpy/numpy/issues/8352
46+
def wrapper(*args, **kwargs):
47+
result = func(*args, **kwargs)
48+
if isinstance(result, str):
49+
result = np.array(result, dtype=object)
50+
return result
51+
52+
return wrapper
53+
54+
return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs)
55+
56+
57+
class MockEngineDecorator:
58+
__pandas_udf__ = MockExecutionEngine
59+
60+
61+
@pytest.fixture(params=[None, MockEngineDecorator])
62+
def engine(request):
63+
return request.param

pandas/tests/apply/test_frame_apply.py

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -17,63 +17,11 @@
1717
date_range,
1818
)
1919
import pandas._testing as tm
20-
from pandas.api.executors import BaseExecutionEngine
20+
from pandas.tests.apply.conftest import MockEngineDecorator
2121
from pandas.tests.frame.common import zip_frames
2222
from pandas.util.version import Version
2323

2424

25-
class MockExecutionEngine(BaseExecutionEngine):
26-
"""
27-
Execution Engine to test if the execution engine interface receives and
28-
uses all parameters provided by the user.
29-
30-
Making this engine work as the default Python engine by calling it, no extra
31-
functionality is implemented here.
32-
33-
When testing, this will be called when this engine is provided, and then the
34-
same pandas.map and pandas.apply function will be called, but without engine,
35-
executing the default behavior from the python engine.
36-
"""
37-
38-
def map(data, func, args, kwargs, decorator, skip_na):
39-
kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {}
40-
return data.map(
41-
func, action_na="ignore" if skip_na else False, **kwargs_to_pass
42-
)
43-
44-
def apply(data, func, args, kwargs, decorator, axis):
45-
if isinstance(data, Series):
46-
return data.apply(func, convert_dtype=True, args=args, by_row=False)
47-
elif isinstance(data, DataFrame):
48-
return data.apply(
49-
func,
50-
axis=axis,
51-
raw=False,
52-
result_type=None,
53-
args=args,
54-
by_row="compat",
55-
**kwargs,
56-
)
57-
else:
58-
assert isinstance(data, np.ndarray)
59-
60-
def wrap_function(func):
61-
# https://github.com/numpy/numpy/issues/8352
62-
def wrapper(*args, **kwargs):
63-
result = func(*args, **kwargs)
64-
if isinstance(result, str):
65-
result = np.array(result, dtype=object)
66-
return result
67-
68-
return wrapper
69-
70-
return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs)
71-
72-
73-
class MockEngineDecorator:
74-
__pandas_udf__ = MockExecutionEngine
75-
76-
7725
@pytest.fixture
7826
def int_frame_const_col():
7927
"""

pandas/tests/apply/test_series_apply.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,13 +376,13 @@ def test_demo():
376376

377377

378378
@pytest.mark.parametrize("func", [str, lambda x: str(x)])
379-
def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row):
379+
def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row, engine):
380380
# test that we are evaluating row-by-row first if by_row="compat"
381381
# else vectorized evaluation
382382
result = string_series.apply(func, by_row=by_row)
383383

384384
if by_row:
385-
expected = string_series.map(func)
385+
expected = string_series.map(func, engine=engine)
386386
tm.assert_series_equal(result, expected)
387387
else:
388388
assert result == str(string_series)

pandas/tests/series/methods/test_map.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
)
2222
import pandas._testing as tm
2323

24+
# The fixture it's mostly used in pandas/tests/apply, so it's defined in that
25+
# conftest, which is out of scope here. So we need to manually import
26+
from pandas.tests.apply.conftest import engine # noqa: F401
27+
2428

2529
def test_series_map_box_timedelta():
2630
# GH#11349
@@ -32,16 +36,20 @@ def f(x):
3236
ser.map(f)
3337

3438

35-
def test_map_callable(datetime_series):
39+
def test_map_callable(datetime_series, engine): # noqa: F811
3640
with np.errstate(all="ignore"):
37-
tm.assert_series_equal(datetime_series.map(np.sqrt), np.sqrt(datetime_series))
41+
tm.assert_series_equal(
42+
datetime_series.map(np.sqrt, engine=engine), np.sqrt(datetime_series)
43+
)
3844

3945
# map function element-wise
40-
tm.assert_series_equal(datetime_series.map(math.exp), np.exp(datetime_series))
46+
tm.assert_series_equal(
47+
datetime_series.map(math.exp, engine=engine), np.exp(datetime_series)
48+
)
4149

4250
# empty series
4351
s = Series(dtype=object, name="foo", index=Index([], name="bar"))
44-
rs = s.map(lambda x: x)
52+
rs = s.map(lambda x: x, engine=engine)
4553
tm.assert_series_equal(s, rs)
4654

4755
# check all metadata (GH 9322)
@@ -52,7 +60,7 @@ def test_map_callable(datetime_series):
5260

5361
# index but no data
5462
s = Series(index=[1, 2, 3], dtype=np.float64)
55-
rs = s.map(lambda x: x)
63+
rs = s.map(lambda x: x, engine=engine)
5664
tm.assert_series_equal(s, rs)
5765

5866

@@ -269,10 +277,10 @@ def test_map_decimal(string_series):
269277
assert isinstance(result.iloc[0], Decimal)
270278

271279

272-
def test_map_na_exclusion():
280+
def test_map_na_exclusion(engine): # noqa: F811
273281
s = Series([1.5, np.nan, 3, np.nan, 5])
274282

275-
result = s.map(lambda x: x * 2, na_action="ignore")
283+
result = s.map(lambda x: x * 2, na_action="ignore", engine=engine)
276284
exp = s * 2
277285
tm.assert_series_equal(result, exp)
278286

@@ -628,3 +636,18 @@ def test_map_no_func_or_arg():
628636
def test_map_func_is_none():
629637
with pytest.raises(ValueError, match="The `func` parameter is required"):
630638
Series([1, 2]).map(func=None)
639+
640+
641+
@pytest.mark.parametrize("func", [{}, {1: 2}, Series([3, 4])])
642+
def test_map_engine_no_function(func):
643+
s = Series([1, 2])
644+
645+
with pytest.raises(ValueError, match="engine argument can only be specified"):
646+
s.map(func, engine="something")
647+
648+
649+
def test_map_engine_not_executor():
650+
s = Series([1, 2])
651+
652+
with pytest.raises(ValueError, match="Not a valid engine: 'something'"):
653+
s.map(lambda x: x, engine="something")

0 commit comments

Comments
 (0)