@@ -79,6 +79,7 @@ class providing the base-class of operations.
79
79
)
80
80
81
81
from pandas .core import nanops
82
+ from pandas .core ._numba import executor
82
83
import pandas .core .algorithms as algorithms
83
84
from pandas .core .arrays import (
84
85
BaseMaskedArray ,
@@ -1259,6 +1260,44 @@ def _numba_prep(self, func, data):
1259
1260
sorted_data ,
1260
1261
)
1261
1262
1263
+ def _numba_agg_general (
1264
+ self ,
1265
+ func : Callable ,
1266
+ engine_kwargs : dict [str , bool ] | None ,
1267
+ numba_cache_key_str : str ,
1268
+ ):
1269
+ """
1270
+ Perform groupby with a standard numerical aggregation function (e.g. mean)
1271
+ with Numba.
1272
+ """
1273
+ if not self .as_index :
1274
+ raise NotImplementedError (
1275
+ "as_index=False is not supported. Use .reset_index() instead."
1276
+ )
1277
+ if self .axis == 1 :
1278
+ raise NotImplementedError ("axis=1 is not supported." )
1279
+
1280
+ with self ._group_selection_context ():
1281
+ data = self ._selected_obj
1282
+ df = data if data .ndim == 2 else data .to_frame ()
1283
+ starts , ends , sorted_index , sorted_data = self ._numba_prep (func , df )
1284
+ aggregator = executor .generate_shared_aggregator (
1285
+ func , engine_kwargs , numba_cache_key_str
1286
+ )
1287
+ result = aggregator (sorted_data , starts , ends , 0 )
1288
+
1289
+ cache_key = (func , numba_cache_key_str )
1290
+ if cache_key not in NUMBA_FUNC_CACHE :
1291
+ NUMBA_FUNC_CACHE [cache_key ] = aggregator
1292
+
1293
+ index = self .grouper .result_index
1294
+ if data .ndim == 1 :
1295
+ result_kwargs = {"name" : data .name }
1296
+ result = result .ravel ()
1297
+ else :
1298
+ result_kwargs = {"columns" : data .columns }
1299
+ return data ._constructor (result , index = index , ** result_kwargs )
1300
+
1262
1301
@final
1263
1302
def _transform_with_numba (self , data , func , * args , engine_kwargs = None , ** kwargs ):
1264
1303
"""
@@ -1827,7 +1866,12 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
1827
1866
@final
1828
1867
@Substitution (name = "groupby" )
1829
1868
@Substitution (see_also = _common_see_also )
1830
- def mean (self , numeric_only : bool | lib .NoDefault = lib .no_default ):
1869
+ def mean (
1870
+ self ,
1871
+ numeric_only : bool | lib .NoDefault = lib .no_default ,
1872
+ engine : str = "cython" ,
1873
+ engine_kwargs : dict [str , bool ] | None = None ,
1874
+ ):
1831
1875
"""
1832
1876
Compute mean of groups, excluding missing values.
1833
1877
@@ -1837,6 +1881,23 @@ def mean(self, numeric_only: bool | lib.NoDefault = lib.no_default):
1837
1881
Include only float, int, boolean columns. If None, will attempt to use
1838
1882
everything, then use only numeric data.
1839
1883
1884
+ engine : str, default None
1885
+ * ``'cython'`` : Runs the operation through C-extensions from cython.
1886
+ * ``'numba'`` : Runs the operation through JIT compiled code from numba.
1887
+ * ``None`` : Defaults to ``'cython'`` or globally setting
1888
+ ``compute.use_numba``
1889
+
1890
+ .. versionadded:: 1.4.0
1891
+
1892
+ engine_kwargs : dict, default None
1893
+ * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
1894
+ * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
1895
+ and ``parallel`` dictionary keys. The values must either be ``True`` or
1896
+ ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
1897
+ ``{{'nopython': True, 'nogil': False, 'parallel': False}}``
1898
+
1899
+ .. versionadded:: 1.4.0
1900
+
1840
1901
Returns
1841
1902
-------
1842
1903
pandas.Series or pandas.DataFrame
@@ -1877,12 +1938,17 @@ def mean(self, numeric_only: bool | lib.NoDefault = lib.no_default):
1877
1938
"""
1878
1939
numeric_only = self ._resolve_numeric_only (numeric_only )
1879
1940
1880
- result = self ._cython_agg_general (
1881
- "mean" ,
1882
- alt = lambda x : Series (x ).mean (numeric_only = numeric_only ),
1883
- numeric_only = numeric_only ,
1884
- )
1885
- return result .__finalize__ (self .obj , method = "groupby" )
1941
+ if maybe_use_numba (engine ):
1942
+ from pandas .core ._numba .kernels import sliding_mean
1943
+
1944
+ return self ._numba_agg_general (sliding_mean , engine_kwargs , "groupby_mean" )
1945
+ else :
1946
+ result = self ._cython_agg_general (
1947
+ "mean" ,
1948
+ alt = lambda x : Series (x ).mean (numeric_only = numeric_only ),
1949
+ numeric_only = numeric_only ,
1950
+ )
1951
+ return result .__finalize__ (self .obj , method = "groupby" )
1886
1952
1887
1953
@final
1888
1954
@Substitution (name = "groupby" )
0 commit comments