@@ -97,10 +97,8 @@ class providing the base-class of operations.
97
97
BaseMaskedArray ,
98
98
BooleanArray ,
99
99
Categorical ,
100
- DatetimeArray ,
101
100
ExtensionArray ,
102
101
FloatingArray ,
103
- TimedeltaArray ,
104
102
)
105
103
from pandas .core .base import (
106
104
PandasObject ,
@@ -1993,30 +1991,12 @@ def std(
1993
1991
1994
1992
return np .sqrt (self ._numba_agg_general (sliding_var , engine_kwargs , ddof ))
1995
1993
else :
1996
-
1997
- def _preprocessing (values ):
1998
- if isinstance (values , BaseMaskedArray ):
1999
- return values ._data , None
2000
- return values , None
2001
-
2002
- def _postprocessing (vals , inference , result_mask = None ) -> ArrayLike :
2003
- if result_mask is not None :
2004
- if result_mask .ndim == 2 :
2005
- result_mask = result_mask [:, 0 ]
2006
- return FloatingArray (np .sqrt (vals ), result_mask .view (np .bool_ ))
2007
- return np .sqrt (vals )
2008
-
2009
- result = self ._get_cythonized_result (
2010
- libgroupby .group_var ,
2011
- cython_dtype = np .dtype (np .float64 ),
1994
+ return self ._cython_agg_general (
1995
+ "std" ,
1996
+ alt = lambda x : Series (x ).std (ddof = ddof ),
2012
1997
numeric_only = numeric_only ,
2013
- needs_counts = True ,
2014
- pre_processing = _preprocessing ,
2015
- post_processing = _postprocessing ,
2016
1998
ddof = ddof ,
2017
- how = "std" ,
2018
1999
)
2019
- return result
2020
2000
2021
2001
@final
2022
2002
@Substitution (name = "groupby" )
@@ -2245,18 +2225,12 @@ def sem(self, ddof: int = 1, numeric_only: bool = False):
2245
2225
f"{ type (self ).__name__ } .sem called with "
2246
2226
f"numeric_only={ numeric_only } and dtype { self .obj .dtype } "
2247
2227
)
2248
- result = self .std (ddof = ddof , numeric_only = numeric_only )
2249
-
2250
- if result .ndim == 1 :
2251
- result /= np .sqrt (self .count ())
2252
- else :
2253
- cols = result .columns .difference (self .exclusions ).unique ()
2254
- counts = self .count ()
2255
- result_ilocs = result .columns .get_indexer_for (cols )
2256
- count_ilocs = counts .columns .get_indexer_for (cols )
2257
-
2258
- result .iloc [:, result_ilocs ] /= np .sqrt (counts .iloc [:, count_ilocs ])
2259
- return result
2228
+ return self ._cython_agg_general (
2229
+ "sem" ,
2230
+ alt = lambda x : Series (x ).sem (ddof = ddof ),
2231
+ numeric_only = numeric_only ,
2232
+ ddof = ddof ,
2233
+ )
2260
2234
2261
2235
@final
2262
2236
@Substitution (name = "groupby" )
@@ -3734,7 +3708,6 @@ def _get_cythonized_result(
3734
3708
base_func : Callable ,
3735
3709
cython_dtype : np .dtype ,
3736
3710
numeric_only : bool = False ,
3737
- needs_counts : bool = False ,
3738
3711
pre_processing = None ,
3739
3712
post_processing = None ,
3740
3713
how : str = "any_all" ,
@@ -3750,8 +3723,6 @@ def _get_cythonized_result(
3750
3723
Type of the array that will be modified by the Cython call.
3751
3724
numeric_only : bool, default False
3752
3725
Whether only numeric datatypes should be computed
3753
- needs_counts : bool, default False
3754
- Whether the counts should be a part of the Cython call
3755
3726
pre_processing : function, default None
3756
3727
Function to be applied to `values` prior to passing to Cython.
3757
3728
Function should return a tuple where the first element is the
@@ -3798,14 +3769,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
3798
3769
3799
3770
inferences = None
3800
3771
3801
- if needs_counts :
3802
- counts = np .zeros (ngroups , dtype = np .int64 )
3803
- func = partial (func , counts = counts )
3804
-
3805
- is_datetimelike = values .dtype .kind in ["m" , "M" ]
3806
3772
vals = values
3807
- if is_datetimelike and how == "std" :
3808
- vals = vals .view ("i8" )
3809
3773
if pre_processing :
3810
3774
vals , inferences = pre_processing (vals )
3811
3775
@@ -3814,11 +3778,10 @@ def blk_func(values: ArrayLike) -> ArrayLike:
3814
3778
vals = vals .reshape ((- 1 , 1 ))
3815
3779
func = partial (func , values = vals )
3816
3780
3817
- if how != "std" or isinstance (values , BaseMaskedArray ):
3818
- mask = isna (values ).view (np .uint8 )
3819
- if mask .ndim == 1 :
3820
- mask = mask .reshape (- 1 , 1 )
3821
- func = partial (func , mask = mask )
3781
+ mask = isna (values ).view (np .uint8 )
3782
+ if mask .ndim == 1 :
3783
+ mask = mask .reshape (- 1 , 1 )
3784
+ func = partial (func , mask = mask )
3822
3785
3823
3786
result_mask = None
3824
3787
if isinstance (values , BaseMaskedArray ):
@@ -3827,10 +3790,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
3827
3790
func = partial (func , result_mask = result_mask )
3828
3791
3829
3792
# Call func to modify result in place
3830
- if how == "std" :
3831
- func (** kwargs , is_datetimelike = is_datetimelike )
3832
- else :
3833
- func (** kwargs )
3793
+ func (** kwargs )
3834
3794
3835
3795
if values .ndim == 1 :
3836
3796
assert result .shape [1 ] == 1 , result .shape
@@ -3842,15 +3802,6 @@ def blk_func(values: ArrayLike) -> ArrayLike:
3842
3802
if post_processing :
3843
3803
result = post_processing (result , inferences , result_mask = result_mask )
3844
3804
3845
- if how == "std" and is_datetimelike :
3846
- values = cast ("DatetimeArray | TimedeltaArray" , values )
3847
- unit = values .unit
3848
- with warnings .catch_warnings ():
3849
- # suppress "RuntimeWarning: invalid value encountered in cast"
3850
- warnings .filterwarnings ("ignore" )
3851
- result = result .astype (np .int64 , copy = False )
3852
- result = result .view (f"m8[{ unit } ]" )
3853
-
3854
3805
return result .T
3855
3806
3856
3807
# Operate block-wise instead of column-by-column
0 commit comments