@@ -1277,6 +1277,7 @@ def result_to_bool(result: np.ndarray, inference: Type) -> np.ndarray:
1277
1277
return self ._get_cythonized_result (
1278
1278
"group_any_all" ,
1279
1279
aggregate = True ,
1280
+ numeric_only = False ,
1280
1281
cython_dtype = np .dtype (np .uint8 ),
1281
1282
needs_values = True ,
1282
1283
needs_mask = True ,
@@ -1433,18 +1434,16 @@ def std(self, ddof: int = 1):
1433
1434
Series or DataFrame
1434
1435
Standard deviation of values within each group.
1435
1436
"""
1436
- result = self .var (ddof = ddof )
1437
- if result .ndim == 1 :
1438
- result = np .sqrt (result )
1439
- else :
1440
- cols = result .columns .get_indexer_for (
1441
- result .columns .difference (self .exclusions ).unique ()
1442
- )
1443
- # TODO(GH-22046) - setting with iloc broken if labels are not unique
1444
- # .values to remove labels
1445
- result .iloc [:, cols ] = np .sqrt (result .iloc [:, cols ]).values
1446
-
1447
- return result
1437
+ return self ._get_cythonized_result (
1438
+ "group_var_float64" ,
1439
+ aggregate = True ,
1440
+ needs_counts = True ,
1441
+ needs_values = True ,
1442
+ needs_2d = True ,
1443
+ cython_dtype = np .dtype (np .float64 ),
1444
+ post_processing = lambda vals , inference : np .sqrt (vals ),
1445
+ ddof = ddof ,
1446
+ )
1448
1447
1449
1448
@Substitution (name = "groupby" )
1450
1449
@Appender (_common_see_also )
@@ -1778,6 +1777,7 @@ def _fill(self, direction, limit=None):
1778
1777
1779
1778
return self ._get_cythonized_result (
1780
1779
"group_fillna_indexer" ,
1780
+ numeric_only = False ,
1781
1781
needs_mask = True ,
1782
1782
cython_dtype = np .dtype (np .int64 ),
1783
1783
result_is_index = True ,
@@ -2078,6 +2078,7 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
2078
2078
return self ._get_cythonized_result (
2079
2079
"group_quantile" ,
2080
2080
aggregate = True ,
2081
+ numeric_only = False ,
2081
2082
needs_values = True ,
2082
2083
needs_mask = True ,
2083
2084
cython_dtype = np .dtype (np .float64 ),
@@ -2367,7 +2368,11 @@ def _get_cythonized_result(
2367
2368
how : str ,
2368
2369
cython_dtype : np .dtype ,
2369
2370
aggregate : bool = False ,
2371
+ numeric_only : bool = True ,
2372
+ needs_counts : bool = False ,
2370
2373
needs_values : bool = False ,
2374
+ needs_2d : bool = False ,
2375
+ min_count : Optional [int ] = None ,
2371
2376
needs_mask : bool = False ,
2372
2377
needs_ngroups : bool = False ,
2373
2378
result_is_index : bool = False ,
@@ -2386,9 +2391,18 @@ def _get_cythonized_result(
2386
2391
aggregate : bool, default False
2387
2392
Whether the result should be aggregated to match the number of
2388
2393
groups
2394
+ numeric_only : bool, default True
2395
+ Whether only numeric datatypes should be computed
2396
+ needs_counts : bool, default False
2397
+ Whether the counts should be a part of the Cython call
2389
2398
needs_values : bool, default False
2390
2399
Whether the values should be a part of the Cython call
2391
2400
signature
2401
+ needs_2d : bool, default False
2402
+ Whether the values and result of the Cython call signature
2403
+ are at least 2-dimensional.
2404
+ min_count : int, default None
2405
+ When not None, min_count for the Cython call
2392
2406
needs_mask : bool, default False
2393
2407
Whether boolean mask needs to be part of the Cython call
2394
2408
signature
@@ -2418,7 +2432,7 @@ def _get_cythonized_result(
2418
2432
if result_is_index and aggregate :
2419
2433
raise ValueError ("'result_is_index' and 'aggregate' cannot both be True!" )
2420
2434
if post_processing :
2421
- if not callable (pre_processing ):
2435
+ if not callable (post_processing ):
2422
2436
raise ValueError ("'post_processing' must be a callable!" )
2423
2437
if pre_processing :
2424
2438
if not callable (pre_processing ):
@@ -2438,21 +2452,39 @@ def _get_cythonized_result(
2438
2452
name = obj .name
2439
2453
values = obj ._values
2440
2454
2455
+ if numeric_only and not is_numeric_dtype (values ):
2456
+ continue
2457
+
2441
2458
if aggregate :
2442
2459
result_sz = ngroups
2443
2460
else :
2444
2461
result_sz = len (values )
2445
2462
2446
2463
result = np .zeros (result_sz , dtype = cython_dtype )
2447
- func = partial (base_func , result , labels )
2464
+ if needs_2d :
2465
+ result = result .reshape ((- 1 , 1 ))
2466
+ func = partial (base_func , result )
2467
+
2448
2468
inferences = None
2449
2469
2470
+ if needs_counts :
2471
+ counts = np .zeros (self .ngroups , dtype = np .int64 )
2472
+ func = partial (func , counts )
2473
+
2450
2474
if needs_values :
2451
2475
vals = values
2452
2476
if pre_processing :
2453
2477
vals , inferences = pre_processing (vals )
2478
+ if needs_2d :
2479
+ vals = vals .reshape ((- 1 , 1 ))
2480
+ vals = vals .astype (cython_dtype , copy = False )
2454
2481
func = partial (func , vals )
2455
2482
2483
+ func = partial (func , labels )
2484
+
2485
+ if min_count is not None :
2486
+ func = partial (func , min_count )
2487
+
2456
2488
if needs_mask :
2457
2489
mask = isna (values ).view (np .uint8 )
2458
2490
func = partial (func , mask )
@@ -2462,6 +2494,9 @@ def _get_cythonized_result(
2462
2494
2463
2495
func (** kwargs ) # Call func to modify indexer values in place
2464
2496
2497
+ if needs_2d :
2498
+ result = result .reshape (- 1 )
2499
+
2465
2500
if result_is_index :
2466
2501
result = algorithms .take_nd (values , result )
2467
2502
@@ -2512,6 +2547,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
2512
2547
2513
2548
return self ._get_cythonized_result (
2514
2549
"group_shift_indexer" ,
2550
+ numeric_only = False ,
2515
2551
cython_dtype = np .dtype (np .int64 ),
2516
2552
needs_ngroups = True ,
2517
2553
result_is_index = True ,
0 commit comments