@@ -95,7 +95,6 @@ class providing the base-class of operations.
95
95
from pandas .core ._numba import executor
96
96
from pandas .core .arrays import (
97
97
BaseMaskedArray ,
98
- BooleanArray ,
99
98
Categorical ,
100
99
ExtensionArray ,
101
100
FloatingArray ,
@@ -1545,6 +1544,8 @@ def array_func(values: ArrayLike) -> ArrayLike:
1545
1544
# and non-applicable functions
1546
1545
# try to python agg
1547
1546
# TODO: shouldn't min_count matter?
1547
+ if how in ["any" , "all" ]:
1548
+ raise # TODO: re-raise as TypeError?
1548
1549
result = self ._agg_py_fallback (values , ndim = data .ndim , alt = alt )
1549
1550
1550
1551
return result
@@ -1694,45 +1695,6 @@ def _obj_1d_constructor(self) -> Callable:
1694
1695
assert isinstance (self .obj , Series )
1695
1696
return self .obj ._constructor
1696
1697
1697
- @final
1698
- def _bool_agg (self , val_test : Literal ["any" , "all" ], skipna : bool ):
1699
- """
1700
- Shared func to call any / all Cython GroupBy implementations.
1701
- """
1702
-
1703
- def objs_to_bool (vals : ArrayLike ) -> tuple [np .ndarray , type ]:
1704
- if is_object_dtype (vals .dtype ) and skipna :
1705
- # GH#37501: don't raise on pd.NA when skipna=True
1706
- mask = isna (vals )
1707
- if mask .any ():
1708
- # mask on original values computed separately
1709
- vals = vals .copy ()
1710
- vals [mask ] = True
1711
- elif isinstance (vals , BaseMaskedArray ):
1712
- vals = vals ._data
1713
- vals = vals .astype (bool , copy = False )
1714
- return vals .view (np .int8 ), bool
1715
-
1716
- def result_to_bool (
1717
- result : np .ndarray ,
1718
- inference : type ,
1719
- result_mask ,
1720
- ) -> ArrayLike :
1721
- if result_mask is not None :
1722
- return BooleanArray (result .astype (bool , copy = False ), result_mask )
1723
- else :
1724
- return result .astype (inference , copy = False )
1725
-
1726
- return self ._get_cythonized_result (
1727
- libgroupby .group_any_all ,
1728
- numeric_only = False ,
1729
- cython_dtype = np .dtype (np .int8 ),
1730
- pre_processing = objs_to_bool ,
1731
- post_processing = result_to_bool ,
1732
- val_test = val_test ,
1733
- skipna = skipna ,
1734
- )
1735
-
1736
1698
@final
1737
1699
@Substitution (name = "groupby" )
1738
1700
@Appender (_common_see_also )
@@ -1751,7 +1713,11 @@ def any(self, skipna: bool = True):
1751
1713
DataFrame or Series of boolean values, where a value is True if any element
1752
1714
is True within its respective group, False otherwise.
1753
1715
"""
1754
- return self ._bool_agg ("any" , skipna )
1716
+ return self ._cython_agg_general (
1717
+ "any" ,
1718
+ alt = lambda x : Series (x ).any (skipna = skipna ),
1719
+ skipna = skipna ,
1720
+ )
1755
1721
1756
1722
@final
1757
1723
@Substitution (name = "groupby" )
@@ -1771,7 +1737,11 @@ def all(self, skipna: bool = True):
1771
1737
DataFrame or Series of boolean values, where a value is True if all elements
1772
1738
are True within its respective group, False otherwise.
1773
1739
"""
1774
- return self ._bool_agg ("all" , skipna )
1740
+ return self ._cython_agg_general (
1741
+ "all" ,
1742
+ alt = lambda x : Series (x ).all (skipna = skipna ),
1743
+ skipna = skipna ,
1744
+ )
1775
1745
1776
1746
@final
1777
1747
@Substitution (name = "groupby" )
@@ -3702,116 +3672,6 @@ def cummax(
3702
3672
"cummax" , numeric_only = numeric_only , skipna = skipna
3703
3673
)
3704
3674
3705
- @final
3706
- def _get_cythonized_result (
3707
- self ,
3708
- base_func : Callable ,
3709
- cython_dtype : np .dtype ,
3710
- numeric_only : bool = False ,
3711
- pre_processing = None ,
3712
- post_processing = None ,
3713
- how : str = "any_all" ,
3714
- ** kwargs ,
3715
- ):
3716
- """
3717
- Get result for Cythonized functions.
3718
-
3719
- Parameters
3720
- ----------
3721
- base_func : callable, Cythonized function to be called
3722
- cython_dtype : np.dtype
3723
- Type of the array that will be modified by the Cython call.
3724
- numeric_only : bool, default False
3725
- Whether only numeric datatypes should be computed
3726
- pre_processing : function, default None
3727
- Function to be applied to `values` prior to passing to Cython.
3728
- Function should return a tuple where the first element is the
3729
- values to be passed to Cython and the second element is an optional
3730
- type which the values should be converted to after being returned
3731
- by the Cython operation. This function is also responsible for
3732
- raising a TypeError if the values have an invalid type. Raises
3733
- if `needs_values` is False.
3734
- post_processing : function, default None
3735
- Function to be applied to result of Cython function. Should accept
3736
- an array of values as the first argument and type inferences as its
3737
- second argument, i.e. the signature should be
3738
- (ndarray, Type). If `needs_nullable=True`, a third argument should be
3739
- `nullable`, to allow for processing specific to nullable values.
3740
- how : str, default any_all
3741
- Determines if any/all cython interface or std interface is used.
3742
- **kwargs : dict
3743
- Extra arguments to be passed back to Cython funcs
3744
-
3745
- Returns
3746
- -------
3747
- `Series` or `DataFrame` with filled values
3748
- """
3749
- if post_processing and not callable (post_processing ):
3750
- raise ValueError ("'post_processing' must be a callable!" )
3751
- if pre_processing and not callable (pre_processing ):
3752
- raise ValueError ("'pre_processing' must be a callable!" )
3753
-
3754
- grouper = self .grouper
3755
-
3756
- ids , _ , ngroups = grouper .group_info
3757
-
3758
- base_func = partial (base_func , labels = ids )
3759
-
3760
- def blk_func (values : ArrayLike ) -> ArrayLike :
3761
- values = values .T
3762
- ncols = 1 if values .ndim == 1 else values .shape [1 ]
3763
-
3764
- result : ArrayLike
3765
- result = np .zeros (ngroups * ncols , dtype = cython_dtype )
3766
- result = result .reshape ((ngroups , ncols ))
3767
-
3768
- func = partial (base_func , out = result )
3769
-
3770
- inferences = None
3771
-
3772
- vals = values
3773
- if pre_processing :
3774
- vals , inferences = pre_processing (vals )
3775
-
3776
- vals = vals .astype (cython_dtype , copy = False )
3777
- if vals .ndim == 1 :
3778
- vals = vals .reshape ((- 1 , 1 ))
3779
- func = partial (func , values = vals )
3780
-
3781
- mask = isna (values ).view (np .uint8 )
3782
- if mask .ndim == 1 :
3783
- mask = mask .reshape (- 1 , 1 )
3784
- func = partial (func , mask = mask )
3785
-
3786
- result_mask = None
3787
- if isinstance (values , BaseMaskedArray ):
3788
- result_mask = np .zeros (result .shape , dtype = np .bool_ )
3789
-
3790
- func = partial (func , result_mask = result_mask )
3791
-
3792
- # Call func to modify result in place
3793
- func (** kwargs )
3794
-
3795
- if values .ndim == 1 :
3796
- assert result .shape [1 ] == 1 , result .shape
3797
- result = result [:, 0 ]
3798
- if result_mask is not None :
3799
- assert result_mask .shape [1 ] == 1 , result_mask .shape
3800
- result_mask = result_mask [:, 0 ]
3801
-
3802
- if post_processing :
3803
- result = post_processing (result , inferences , result_mask = result_mask )
3804
-
3805
- return result .T
3806
-
3807
- # Operate block-wise instead of column-by-column
3808
- mgr = self ._get_data_to_aggregate (numeric_only = numeric_only , name = how )
3809
-
3810
- res_mgr = mgr .grouped_reduce (blk_func )
3811
-
3812
- out = self ._wrap_agged_manager (res_mgr )
3813
- return self ._wrap_aggregated_output (out )
3814
-
3815
3675
@final
3816
3676
@Substitution (name = "groupby" )
3817
3677
def shift (
0 commit comments