@@ -1187,7 +1187,7 @@ def _wrap_transformed_output(
1187
1187
result .index = self .obj .index
1188
1188
return result
1189
1189
1190
- def _wrap_applied_output (self , data , values , not_indexed_same : bool = False ):
1190
+ def _wrap_applied_output (self , data , values : list , not_indexed_same : bool = False ):
1191
1191
raise AbstractMethodError (self )
1192
1192
1193
1193
def _resolve_numeric_only (self , numeric_only : bool | lib .NoDefault ) -> bool :
@@ -1667,11 +1667,8 @@ def result_to_bool(
1667
1667
1668
1668
return self ._get_cythonized_result (
1669
1669
libgroupby .group_any_all ,
1670
- aggregate = True ,
1671
1670
numeric_only = False ,
1672
1671
cython_dtype = np .dtype (np .int8 ),
1673
- needs_values = True ,
1674
- needs_2d = True ,
1675
1672
needs_mask = True ,
1676
1673
needs_nullable = True ,
1677
1674
pre_processing = objs_to_bool ,
@@ -1867,10 +1864,7 @@ def std(self, ddof: int = 1):
1867
1864
"""
1868
1865
return self ._get_cythonized_result (
1869
1866
libgroupby .group_var ,
1870
- aggregate = True ,
1871
1867
needs_counts = True ,
1872
- needs_values = True ,
1873
- needs_2d = True ,
1874
1868
cython_dtype = np .dtype (np .float64 ),
1875
1869
post_processing = lambda vals , inference : np .sqrt (vals ),
1876
1870
ddof = ddof ,
@@ -2281,10 +2275,14 @@ def _fill(self, direction: Literal["ffill", "bfill"], limit=None):
2281
2275
limit = - 1
2282
2276
2283
2277
ids , _ , _ = self .grouper .group_info
2278
+ sorted_labels = np .argsort (ids , kind = "mergesort" ).astype (np .intp , copy = False )
2279
+ if direction == "bfill" :
2280
+ sorted_labels = sorted_labels [::- 1 ]
2284
2281
2285
2282
col_func = partial (
2286
2283
libgroupby .group_fillna_indexer ,
2287
2284
labels = ids ,
2285
+ sorted_labels = sorted_labels ,
2288
2286
direction = direction ,
2289
2287
limit = limit ,
2290
2288
dropna = self .dropna ,
@@ -3014,18 +3012,12 @@ def _get_cythonized_result(
3014
3012
self ,
3015
3013
base_func : Callable ,
3016
3014
cython_dtype : np .dtype ,
3017
- aggregate : bool = False ,
3018
3015
numeric_only : bool | lib .NoDefault = lib .no_default ,
3019
3016
needs_counts : bool = False ,
3020
- needs_values : bool = False ,
3021
- needs_2d : bool = False ,
3022
3017
needs_nullable : bool = False ,
3023
- min_count : int | None = None ,
3024
3018
needs_mask : bool = False ,
3025
- needs_ngroups : bool = False ,
3026
3019
pre_processing = None ,
3027
3020
post_processing = None ,
3028
- fill_value = None ,
3029
3021
** kwargs ,
3030
3022
):
3031
3023
"""
@@ -3036,26 +3028,13 @@ def _get_cythonized_result(
3036
3028
base_func : callable, Cythonized function to be called
3037
3029
cython_dtype : np.dtype
3038
3030
Type of the array that will be modified by the Cython call.
3039
- aggregate : bool, default False
3040
- Whether the result should be aggregated to match the number of
3041
- groups
3042
3031
numeric_only : bool, default True
3043
3032
Whether only numeric datatypes should be computed
3044
3033
needs_counts : bool, default False
3045
3034
Whether the counts should be a part of the Cython call
3046
- needs_values : bool, default False
3047
- Whether the values should be a part of the Cython call
3048
- signature
3049
- needs_2d : bool, default False
3050
- Whether the values and result of the Cython call signature
3051
- are 2-dimensional.
3052
- min_count : int, default None
3053
- When not None, min_count for the Cython call
3054
3035
needs_mask : bool, default False
3055
3036
Whether boolean mask needs to be part of the Cython call
3056
3037
signature
3057
- needs_ngroups : bool, default False
3058
- Whether number of groups is part of the Cython call signature
3059
3038
needs_nullable : bool, default False
3060
3039
Whether a bool specifying if the input is nullable is part
3061
3040
of the Cython call signature
@@ -3073,8 +3052,6 @@ def _get_cythonized_result(
3073
3052
second argument, i.e. the signature should be
3074
3053
(ndarray, Type). If `needs_nullable=True`, a third argument should be
3075
3054
`nullable`, to allow for processing specific to nullable values.
3076
- fill_value : any, default None
3077
- The scalar value to use for newly introduced missing values.
3078
3055
**kwargs : dict
3079
3056
Extra arguments to be passed back to Cython funcs
3080
3057
@@ -3086,13 +3063,8 @@ def _get_cythonized_result(
3086
3063
3087
3064
if post_processing and not callable (post_processing ):
3088
3065
raise ValueError ("'post_processing' must be a callable!" )
3089
- if pre_processing :
3090
- if not callable (pre_processing ):
3091
- raise ValueError ("'pre_processing' must be a callable!" )
3092
- if not needs_values :
3093
- raise ValueError (
3094
- "Cannot use 'pre_processing' without specifying 'needs_values'!"
3095
- )
3066
+ if pre_processing and not callable (pre_processing ):
3067
+ raise ValueError ("'pre_processing' must be a callable!" )
3096
3068
3097
3069
grouper = self .grouper
3098
3070
@@ -3101,29 +3073,14 @@ def _get_cythonized_result(
3101
3073
3102
3074
how = base_func .__name__
3103
3075
base_func = partial (base_func , labels = ids )
3104
- if needs_ngroups :
3105
- base_func = partial (base_func , ngroups = ngroups )
3106
- if min_count is not None :
3107
- base_func = partial (base_func , min_count = min_count )
3108
-
3109
- real_2d = how in ["group_any_all" , "group_var" ]
3110
3076
3111
3077
def blk_func (values : ArrayLike ) -> ArrayLike :
3112
3078
values = values .T
3113
3079
ncols = 1 if values .ndim == 1 else values .shape [1 ]
3114
3080
3115
- if aggregate :
3116
- result_sz = ngroups
3117
- else :
3118
- result_sz = values .shape [- 1 ]
3119
-
3120
3081
result : ArrayLike
3121
- result = np .zeros (result_sz * ncols , dtype = cython_dtype )
3122
- if needs_2d :
3123
- if real_2d :
3124
- result = result .reshape ((result_sz , ncols ))
3125
- else :
3126
- result = result .reshape (- 1 , 1 )
3082
+ result = np .zeros (ngroups * ncols , dtype = cython_dtype )
3083
+ result = result .reshape ((ngroups , ncols ))
3127
3084
3128
3085
func = partial (base_func , out = result )
3129
3086
@@ -3133,19 +3090,18 @@ def blk_func(values: ArrayLike) -> ArrayLike:
3133
3090
counts = np .zeros (self .ngroups , dtype = np .int64 )
3134
3091
func = partial (func , counts = counts )
3135
3092
3136
- if needs_values :
3137
- vals = values
3138
- if pre_processing :
3139
- vals , inferences = pre_processing (vals )
3093
+ vals = values
3094
+ if pre_processing :
3095
+ vals , inferences = pre_processing (vals )
3140
3096
3141
- vals = vals .astype (cython_dtype , copy = False )
3142
- if needs_2d and vals .ndim == 1 :
3143
- vals = vals .reshape ((- 1 , 1 ))
3144
- func = partial (func , values = vals )
3097
+ vals = vals .astype (cython_dtype , copy = False )
3098
+ if vals .ndim == 1 :
3099
+ vals = vals .reshape ((- 1 , 1 ))
3100
+ func = partial (func , values = vals )
3145
3101
3146
3102
if needs_mask :
3147
3103
mask = isna (values ).view (np .uint8 )
3148
- if needs_2d and mask .ndim == 1 :
3104
+ if mask .ndim == 1 :
3149
3105
mask = mask .reshape (- 1 , 1 )
3150
3106
func = partial (func , mask = mask )
3151
3107
@@ -3155,11 +3111,9 @@ def blk_func(values: ArrayLike) -> ArrayLike:
3155
3111
3156
3112
func (** kwargs ) # Call func to modify indexer values in place
3157
3113
3158
- if real_2d and values .ndim == 1 :
3114
+ if values .ndim == 1 :
3159
3115
assert result .shape [1 ] == 1 , result .shape
3160
3116
result = result [:, 0 ]
3161
- if needs_mask :
3162
- mask = mask [:, 0 ]
3163
3117
3164
3118
if post_processing :
3165
3119
pp_kwargs = {}
@@ -3168,17 +3122,10 @@ def blk_func(values: ArrayLike) -> ArrayLike:
3168
3122
3169
3123
result = post_processing (result , inferences , ** pp_kwargs )
3170
3124
3171
- if needs_2d and not real_2d :
3172
- if result .ndim == 2 :
3173
- assert result .shape [1 ] == 1
3174
- # error: No overload variant of "__getitem__" of "ExtensionArray"
3175
- # matches argument type "Tuple[slice, int]"
3176
- result = result [:, 0 ] # type: ignore[call-overload]
3177
-
3178
3125
return result .T
3179
3126
3180
3127
obj = self ._obj_with_exclusions
3181
- if obj .ndim == 2 and self .axis == 0 and needs_2d and real_2d :
3128
+ if obj .ndim == 2 and self .axis == 0 :
3182
3129
# Operate block-wise instead of column-by-column
3183
3130
mgr = obj ._mgr
3184
3131
if numeric_only :
@@ -3187,10 +3134,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
3187
3134
# setting ignore_failures=False for troubleshooting
3188
3135
res_mgr = mgr .grouped_reduce (blk_func , ignore_failures = False )
3189
3136
output = type (obj )(res_mgr )
3190
- if aggregate :
3191
- return self ._wrap_aggregated_output (output )
3192
- else :
3193
- return self ._wrap_transformed_output (output )
3137
+ return self ._wrap_aggregated_output (output )
3194
3138
3195
3139
error_msg = ""
3196
3140
for idx , obj in enumerate (self ._iterate_slices ()):
@@ -3222,10 +3166,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
3222
3166
if not output and error_msg != "" :
3223
3167
raise TypeError (error_msg )
3224
3168
3225
- if aggregate :
3226
- return self ._wrap_aggregated_output (output )
3227
- else :
3228
- return self ._wrap_transformed_output (output )
3169
+ return self ._wrap_aggregated_output (output )
3229
3170
3230
3171
@final
3231
3172
@Substitution (name = "groupby" )
0 commit comments