Skip to content

Commit 7ec827e

Browse files
[PoC] Allow JIT compilation with an internal API
1 parent 57fd502 commit 7ec827e

File tree

2 files changed

+91
-1
lines changed

2 files changed

+91
-1
lines changed

pandas/core/bodo_patched.py

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""
2+
This file is here as an example, this code will live in the Numba and
3+
Bodo libraries.
4+
"""
5+
from __future__ import annotations
6+
from collections.abc import Callable
7+
from typing import TYPE_CHECKING, Literal, Any
8+
9+
import pandas as pd
10+
import bodo
11+
12+
if TYPE_CHECKING:
13+
from pandas._typing import Axis, AggFuncType
14+
15+
def __pandas_udf__(
16+
jit_decorator: Callable,
17+
obj: pd.Series | pd.DataFrame,
18+
method: Literal["apply", "map"],
19+
func: AggFuncType,
20+
axis: Axis,
21+
raw: bool,
22+
result_type: Literal["expand", "reduce", "broadcast"] | None,
23+
args: tuple,
24+
kwargs: dict[str, Any],
25+
by_row: Literal[False, "compat"],
26+
):
27+
28+
if isinstance(obj, pd.DataFrame) and method == "apply":
29+
if result_type is not None:
30+
raise NotImplementedError(
31+
"engine='bodo' not supported when result_type is not None"
32+
)
33+
34+
if raw:
35+
raise NotImplementedError(
36+
"engine='bodo' not supported when raw=True"
37+
)
38+
if isinstance(func, str) and axis != 1:
39+
raise NotImplementedError(
40+
"engine='bodo' only supports axis=1 when func is the name of a "
41+
"user-defined function"
42+
)
43+
if args or kwargs:
44+
raise NotImplementedError(
45+
"engine='bodo' not supported when args or kwargs are specified"
46+
)
47+
@jit_decorator
48+
def jit_func(df, func, axis):
49+
return df.apply(func, axis=axis)
50+
51+
return jit_func(obj, func, axis)
52+
else:
53+
raise NotImplementedError(
54+
f"engine='bodo' not supported for {obj.__class__.__name__}.{method}"
55+
)
56+
57+
bodo.jit.__pandas_udf__ = __pandas_udf__

pandas/core/frame.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -10256,6 +10256,7 @@ def apply(
1025610256
by_row: Literal[False, "compat"] = "compat",
1025710257
engine: Literal["python", "numba"] = "python",
1025810258
engine_kwargs: dict[str, bool] | None = None,
10259+
jit: Callable | None = None,
1025910260
**kwargs,
1026010261
):
1026110262
"""
@@ -10345,6 +10346,12 @@ def apply(
1034510346
Pass keyword arguments to the engine.
1034610347
This is currently only used by the numba engine,
1034710348
see the documentation for the engine argument for more information.
10349+
10350+
jit : function, optional
10351+
Numba or Bodo decorator to JIT compile the execution. The main available
10352+
options are ``numba.jit``, ``numba.njit`` or ``bodo.jit``. Parameters can
10353+
be used in the same way as the decorators ``numba.jit(parallel=True)`` etc.
10354+
1034810355
**kwargs
1034910356
Additional keyword arguments to pass as keywords arguments to
1035010357
`func`.
@@ -10435,7 +10442,33 @@ def apply(
1043510442
0 1 2
1043610443
1 1 2
1043710444
2 1 2
10438-
"""
10445+
10446+
Advanced users can speed up their code by using a Just-in-time (JIT) compiler
10447+
with ``apply``. The main JIT compilers available for pandas are Numba and Bodo.
10448+
In general, JIT compilation is only possible when the function passed to
10449+
``apply`` has type stability (variables in the function do not change their
10450+
type during the execution).
10451+
10452+
>>> import bodo
10453+
>>> df.apply(lambda x: x.A + x.B, axis=1, jit=bodo.jit(parallel=True))
10454+
10455+
Note that JIT compilation is only recommended for functions that take a
10456+
significant amount of time to run. Fast functions are unlikely to run faster
10457+
with JIT compilation.
10458+
"""
10459+
if hasattr(jit, "__pandas_udf__"):
10460+
return jit.__pandas_udf__(
10461+
jit_decorator=jit,
10462+
obj=self,
10463+
method="apply",
10464+
func=func,
10465+
axis=axis,
10466+
raw=raw,
10467+
result_type=result_type,
10468+
by_row=by_row,
10469+
args=args,
10470+
kwargs=kwargs)
10471+
1043910472
from pandas.core.apply import frame_apply
1044010473

1044110474
op = frame_apply(

0 commit comments

Comments
 (0)