@@ -1502,7 +1502,7 @@ def _python_apply_general(
1502
1502
)
1503
1503
1504
1504
@final
1505
- def _python_agg_general (self , func , * args , ** kwargs ):
1505
+ def _python_agg_general (self , func , * args , raise_on_typeerror = False , ** kwargs ):
1506
1506
func = com .is_builtin_func (func )
1507
1507
f = lambda x : func (x , * args , ** kwargs )
1508
1508
@@ -1520,6 +1520,8 @@ def _python_agg_general(self, func, *args, **kwargs):
1520
1520
# if this function is invalid for this dtype, we will ignore it.
1521
1521
result = self .grouper .agg_series (obj , f )
1522
1522
except TypeError :
1523
+ if raise_on_typeerror :
1524
+ raise
1523
1525
warn_dropping_nuisance_columns_deprecated (type (self ), "agg" )
1524
1526
continue
1525
1527
@@ -1593,7 +1595,12 @@ def _agg_py_fallback(
1593
1595
1594
1596
@final
1595
1597
def _cython_agg_general (
1596
- self , how : str , alt : Callable , numeric_only : bool , min_count : int = - 1
1598
+ self ,
1599
+ how : str ,
1600
+ alt : Callable ,
1601
+ numeric_only : bool ,
1602
+ min_count : int = - 1 ,
1603
+ ignore_failures : bool = True ,
1597
1604
):
1598
1605
# Note: we never get here with how="ohlc" for DataFrameGroupBy;
1599
1606
# that goes through SeriesGroupBy
@@ -1629,7 +1636,7 @@ def array_func(values: ArrayLike) -> ArrayLike:
1629
1636
1630
1637
# TypeError -> we may have an exception in trying to aggregate
1631
1638
# continue and exclude the block
1632
- new_mgr = data .grouped_reduce (array_func , ignore_failures = True )
1639
+ new_mgr = data .grouped_reduce (array_func , ignore_failures = ignore_failures )
1633
1640
1634
1641
if not is_ser and len (new_mgr ) < len (data ):
1635
1642
warn_dropping_nuisance_columns_deprecated (type (self ), how )
@@ -2041,6 +2048,7 @@ def std(
2041
2048
ddof : int = 1 ,
2042
2049
engine : str | None = None ,
2043
2050
engine_kwargs : dict [str , bool ] | None = None ,
2051
+ numeric_only : bool | lib .NoDefault = lib .no_default ,
2044
2052
):
2045
2053
"""
2046
2054
Compute standard deviation of groups, excluding missing values.
@@ -2069,6 +2077,11 @@ def std(
2069
2077
2070
2078
.. versionadded:: 1.4.0
2071
2079
2080
+ numeric_only : bool, default True
2081
+ Include only `float`, `int` or `boolean` data.
2082
+
2083
+ .. versionadded:: 1.5.0
2084
+
2072
2085
Returns
2073
2086
-------
2074
2087
Series or DataFrame
@@ -2081,8 +2094,9 @@ def std(
2081
2094
else :
2082
2095
return self ._get_cythonized_result (
2083
2096
libgroupby .group_var ,
2084
- needs_counts = True ,
2085
2097
cython_dtype = np .dtype (np .float64 ),
2098
+ numeric_only = numeric_only ,
2099
+ needs_counts = True ,
2086
2100
post_processing = lambda vals , inference : np .sqrt (vals ),
2087
2101
ddof = ddof ,
2088
2102
)
@@ -2095,6 +2109,7 @@ def var(
2095
2109
ddof : int = 1 ,
2096
2110
engine : str | None = None ,
2097
2111
engine_kwargs : dict [str , bool ] | None = None ,
2112
+ numeric_only : bool | lib .NoDefault = lib .no_default ,
2098
2113
):
2099
2114
"""
2100
2115
Compute variance of groups, excluding missing values.
@@ -2123,6 +2138,11 @@ def var(
2123
2138
2124
2139
.. versionadded:: 1.4.0
2125
2140
2141
+ numeric_only : bool, default True
2142
+ Include only `float`, `int` or `boolean` data.
2143
+
2144
+ .. versionadded:: 1.5.0
2145
+
2126
2146
Returns
2127
2147
-------
2128
2148
Series or DataFrame
@@ -2133,22 +2153,25 @@ def var(
2133
2153
2134
2154
return self ._numba_agg_general (sliding_var , engine_kwargs , ddof )
2135
2155
else :
2156
+ numeric_only_bool = self ._resolve_numeric_only (numeric_only )
2136
2157
if ddof == 1 :
2137
- numeric_only = self ._resolve_numeric_only (lib .no_default )
2138
2158
return self ._cython_agg_general (
2139
2159
"var" ,
2140
2160
alt = lambda x : Series (x ).var (ddof = ddof ),
2141
- numeric_only = numeric_only ,
2161
+ numeric_only = numeric_only_bool ,
2162
+ ignore_failures = numeric_only is lib .no_default ,
2142
2163
)
2143
2164
else :
2144
2165
func = lambda x : x .var (ddof = ddof )
2145
2166
with self ._group_selection_context ():
2146
- return self ._python_agg_general (func )
2167
+ return self ._python_agg_general (
2168
+ func , raise_on_typeerror = not numeric_only_bool
2169
+ )
2147
2170
2148
2171
@final
2149
2172
@Substitution (name = "groupby" )
2150
2173
@Appender (_common_see_also )
2151
- def sem (self , ddof : int = 1 ):
2174
+ def sem (self , ddof : int = 1 , numeric_only : bool | lib . NoDefault = lib . no_default ):
2152
2175
"""
2153
2176
Compute standard error of the mean of groups, excluding missing values.
2154
2177
@@ -2159,12 +2182,17 @@ def sem(self, ddof: int = 1):
2159
2182
ddof : int, default 1
2160
2183
Degrees of freedom.
2161
2184
2185
+ numeric_only : bool, default True
2186
+ Include only `float`, `int` or `boolean` data.
2187
+
2188
+ .. versionadded:: 1.5.0
2189
+
2162
2190
Returns
2163
2191
-------
2164
2192
Series or DataFrame
2165
2193
Standard error of the mean of values within each group.
2166
2194
"""
2167
- result = self .std (ddof = ddof )
2195
+ result = self .std (ddof = ddof , numeric_only = numeric_only )
2168
2196
if result .ndim == 1 :
2169
2197
result /= np .sqrt (self .count ())
2170
2198
else :
@@ -2979,7 +3007,12 @@ def nth(
2979
3007
return result
2980
3008
2981
3009
@final
2982
- def quantile (self , q = 0.5 , interpolation : str = "linear" ):
3010
+ def quantile (
3011
+ self ,
3012
+ q = 0.5 ,
3013
+ interpolation : str = "linear" ,
3014
+ numeric_only : bool | lib .NoDefault = lib .no_default ,
3015
+ ):
2983
3016
"""
2984
3017
Return group values at the given quantile, a la numpy.percentile.
2985
3018
@@ -2989,6 +3022,10 @@ def quantile(self, q=0.5, interpolation: str = "linear"):
2989
3022
Value(s) between 0 and 1 providing the quantile(s) to compute.
2990
3023
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
2991
3024
Method to use when the desired quantile falls between two points.
3025
+ numeric_only : bool, default True
3026
+ Include only `float`, `int` or `boolean` data.
3027
+
3028
+ .. versionadded:: 1.5.0
2992
3029
2993
3030
Returns
2994
3031
-------
@@ -3013,6 +3050,7 @@ def quantile(self, q=0.5, interpolation: str = "linear"):
3013
3050
a 2.0
3014
3051
b 3.0
3015
3052
"""
3053
+ numeric_only_bool = self ._resolve_numeric_only (numeric_only )
3016
3054
3017
3055
def pre_processor (vals : ArrayLike ) -> tuple [np .ndarray , np .dtype | None ]:
3018
3056
if is_object_dtype (vals ):
@@ -3106,9 +3144,15 @@ def blk_func(values: ArrayLike) -> ArrayLike:
3106
3144
obj = self ._obj_with_exclusions
3107
3145
is_ser = obj .ndim == 1
3108
3146
mgr = self ._get_data_to_aggregate ()
3109
-
3110
- res_mgr = mgr .grouped_reduce (blk_func , ignore_failures = True )
3111
- if not is_ser and len (res_mgr .items ) != len (mgr .items ):
3147
+ data = mgr .get_numeric_data () if numeric_only_bool else mgr
3148
+ ignore_failures = numeric_only_bool
3149
+ res_mgr = data .grouped_reduce (blk_func , ignore_failures = ignore_failures )
3150
+
3151
+ if (
3152
+ numeric_only is lib .no_default
3153
+ and not is_ser
3154
+ and len (res_mgr .items ) != len (mgr .items )
3155
+ ):
3112
3156
warn_dropping_nuisance_columns_deprecated (type (self ), "quantile" )
3113
3157
3114
3158
if len (res_mgr .items ) == 0 :
0 commit comments