@@ -1272,6 +1272,7 @@ def _numba_agg_general(
1272
1272
func : Callable ,
1273
1273
engine_kwargs : dict [str , bool ] | None ,
1274
1274
numba_cache_key_str : str ,
1275
+ * aggregator_args ,
1275
1276
):
1276
1277
"""
1277
1278
Perform groupby with a standard numerical aggregation function (e.g. mean)
@@ -1291,7 +1292,7 @@ def _numba_agg_general(
1291
1292
aggregator = executor .generate_shared_aggregator (
1292
1293
func , engine_kwargs , numba_cache_key_str
1293
1294
)
1294
- result = aggregator (sorted_data , starts , ends , 0 )
1295
+ result = aggregator (sorted_data , starts , ends , 0 , * aggregator_args )
1295
1296
1296
1297
cache_key = (func , numba_cache_key_str )
1297
1298
if cache_key not in NUMBA_FUNC_CACHE :
@@ -1989,7 +1990,12 @@ def median(self, numeric_only: bool | lib.NoDefault = lib.no_default):
1989
1990
@final
1990
1991
@Substitution (name = "groupby" )
1991
1992
@Appender (_common_see_also )
1992
- def std (self , ddof : int = 1 ):
1993
+ def std (
1994
+ self ,
1995
+ ddof : int = 1 ,
1996
+ engine : str | None = None ,
1997
+ engine_kwargs : dict [str , bool ] | None = None ,
1998
+ ):
1993
1999
"""
1994
2000
Compute standard deviation of groups, excluding missing values.
1995
2001
@@ -2000,23 +2006,52 @@ def std(self, ddof: int = 1):
2000
2006
ddof : int, default 1
2001
2007
Degrees of freedom.
2002
2008
2009
+ engine : str, default None
2010
+ * ``'cython'`` : Runs the operation through C-extensions from cython.
2011
+ * ``'numba'`` : Runs the operation through JIT compiled code from numba.
2012
+ * ``None`` : Defaults to ``'cython'`` or globally setting
2013
+ ``compute.use_numba``
2014
+
2015
+ .. versionadded:: 1.4.0
2016
+
2017
+ engine_kwargs : dict, default None
2018
+ * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
2019
+ * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
2020
+ and ``parallel`` dictionary keys. The values must either be ``True`` or
2021
+ ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
2022
+ ``{{'nopython': True, 'nogil': False, 'parallel': False}}``
2023
+
2024
+ .. versionadded:: 1.4.0
2025
+
2003
2026
Returns
2004
2027
-------
2005
2028
Series or DataFrame
2006
2029
Standard deviation of values within each group.
2007
2030
"""
2008
- return self ._get_cythonized_result (
2009
- libgroupby .group_var ,
2010
- needs_counts = True ,
2011
- cython_dtype = np .dtype (np .float64 ),
2012
- post_processing = lambda vals , inference : np .sqrt (vals ),
2013
- ddof = ddof ,
2014
- )
2031
+ if maybe_use_numba (engine ):
2032
+ from pandas .core ._numba .kernels import sliding_var
2033
+
2034
+ return np .sqrt (
2035
+ self ._numba_agg_general (sliding_var , engine_kwargs , "groupby_std" , ddof )
2036
+ )
2037
+ else :
2038
+ return self ._get_cythonized_result (
2039
+ libgroupby .group_var ,
2040
+ needs_counts = True ,
2041
+ cython_dtype = np .dtype (np .float64 ),
2042
+ post_processing = lambda vals , inference : np .sqrt (vals ),
2043
+ ddof = ddof ,
2044
+ )
2015
2045
2016
2046
@final
2017
2047
@Substitution (name = "groupby" )
2018
2048
@Appender (_common_see_also )
2019
- def var (self , ddof : int = 1 ):
2049
+ def var (
2050
+ self ,
2051
+ ddof : int = 1 ,
2052
+ engine : str | None = None ,
2053
+ engine_kwargs : dict [str , bool ] | None = None ,
2054
+ ):
2020
2055
"""
2021
2056
Compute variance of groups, excluding missing values.
2022
2057
@@ -2027,20 +2062,46 @@ def var(self, ddof: int = 1):
2027
2062
ddof : int, default 1
2028
2063
Degrees of freedom.
2029
2064
2065
+ engine : str, default None
2066
+ * ``'cython'`` : Runs the operation through C-extensions from cython.
2067
+ * ``'numba'`` : Runs the operation through JIT compiled code from numba.
2068
+ * ``None`` : Defaults to ``'cython'`` or globally setting
2069
+ ``compute.use_numba``
2070
+
2071
+ .. versionadded:: 1.4.0
2072
+
2073
+ engine_kwargs : dict, default None
2074
+ * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
2075
+ * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
2076
+ and ``parallel`` dictionary keys. The values must either be ``True`` or
2077
+ ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
2078
+ ``{{'nopython': True, 'nogil': False, 'parallel': False}}``
2079
+
2080
+ .. versionadded:: 1.4.0
2081
+
2030
2082
Returns
2031
2083
-------
2032
2084
Series or DataFrame
2033
2085
Variance of values within each group.
2034
2086
"""
2035
- if ddof == 1 :
2036
- numeric_only = self ._resolve_numeric_only (lib .no_default )
2037
- return self ._cython_agg_general (
2038
- "var" , alt = lambda x : Series (x ).var (ddof = ddof ), numeric_only = numeric_only
2087
+ if maybe_use_numba (engine ):
2088
+ from pandas .core ._numba .kernels import sliding_var
2089
+
2090
+ return self ._numba_agg_general (
2091
+ sliding_var , engine_kwargs , "groupby_var" , ddof
2039
2092
)
2040
2093
else :
2041
- func = lambda x : x .var (ddof = ddof )
2042
- with self ._group_selection_context ():
2043
- return self ._python_agg_general (func )
2094
+ if ddof == 1 :
2095
+ numeric_only = self ._resolve_numeric_only (lib .no_default )
2096
+ return self ._cython_agg_general (
2097
+ "var" ,
2098
+ alt = lambda x : Series (x ).var (ddof = ddof ),
2099
+ numeric_only = numeric_only ,
2100
+ )
2101
+ else :
2102
+ func = lambda x : x .var (ddof = ddof )
2103
+ with self ._group_selection_context ():
2104
+ return self ._python_agg_general (func )
2044
2105
2045
2106
@final
2046
2107
@Substitution (name = "groupby" )
0 commit comments