add *args for raw numba apply

auderson · auderson · commit 3a5fc90e8e2e · 2024-05-18T13:48:36.000+08:00
diff --git a/pandas/core/_numba/executor.py b/pandas/core/_numba/executor.py
@@ -24,7 +24,7 @@ def generate_apply_looper(func, nopython=True, nogil=True, parallel=False):
     nb_compat_func = numba.extending.register_jitable(func)
 
     @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
-    def nb_looper(values, axis):
+    def nb_looper(values, axis, *args):
         # Operate on the first row/col in order to get
         # the output shape
         if axis == 0:
@@ -33,7 +33,7 @@ def nb_looper(values, axis):
         else:
             first_elem = values[0]
             dim0 = values.shape[0]
-        res0 = nb_compat_func(first_elem)
+        res0 = nb_compat_func(first_elem, *args)
         # Use np.asarray to get shape for
         # https://github.com/numba/numba/issues/4202#issuecomment-1185981507
         buf_shape = (dim0,) + np.atleast_1d(np.asarray(res0)).shape
@@ -44,11 +44,11 @@ def nb_looper(values, axis):
         if axis == 1:
             buff[0] = res0
             for i in numba.prange(1, values.shape[0]):
-                buff[i] = nb_compat_func(values[i])
+                buff[i] = nb_compat_func(values[i], *args)
         else:
             buff[:, 0] = res0
             for j in numba.prange(1, values.shape[1]):
-                buff[:, j] = nb_compat_func(values[:, j])
+                buff[:, j] = nb_compat_func(values[:, j], *args)
         return buff
 
     return nb_looper
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -51,6 +51,7 @@
 from pandas.core._numba.executor import generate_apply_looper
 import pandas.core.common as com
 from pandas.core.construction import ensure_wrapped_if_datetimelike
+from pandas.core.util.numba_ import get_jit_arguments
 
 if TYPE_CHECKING:
     from collections.abc import (
@@ -972,17 +973,15 @@ def wrapper(*args, **kwargs):
             return wrapper
 
         if engine == "numba":
-            engine_kwargs = {} if engine_kwargs is None else engine_kwargs
-
             # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has
             # incompatible type "Callable[..., Any] | str | list[Callable
             # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
             # list[Callable[..., Any] | str]]"; expected "Hashable"
             nb_looper = generate_apply_looper(
                 self.func,  # type: ignore[arg-type]
-                **engine_kwargs,
+                **get_jit_arguments(engine_kwargs, self.kwargs),
             )
-            result = nb_looper(self.values, self.axis)
+            result = nb_looper(self.values, self.axis, *self.args)
             # If we made the result 2-D, squeeze it back to 1-D
             result = np.squeeze(result)
         else:
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -1718,3 +1718,19 @@ def test_agg_dist_like_and_nonunique_columns():
     result = df.agg({"A": "count"})
     expected = df["A"].count()
     tm.assert_series_equal(result, expected)
+
+
+def test_numba_raw_apply_with_args():
+    # GH:58712
+    df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    result = df.apply(lambda x, a, b: x + a + b, args=(1, 2), engine="numba", raw=True)
+    # note:
+    # result is always float dtype, see core._numba.executor.py:generate_apply_looper
+    expected = df + 3.0
+    tm.assert_frame_equal(result, expected)
+
+    with pytest.raises(
+        pd.errors.NumbaUtilError,
+        match="numba does not support kwargs with nopython=True",
+    ):
+        df.apply(lambda x, a, b: x + a + b, args=(1,), b=2, engine="numba", raw=True)