7
7
from pandas .core .generic import NDFrame , PandasObject
8
8
from pandas .core .index import Index , MultiIndex
9
9
from pandas .core .internals import BlockManager
10
- from pandas .core .reshape import get_group_index
11
10
from pandas .core .series import Series
12
11
from pandas .core .panel import Panel
13
12
from pandas .util .decorators import cache_readonly
@@ -316,6 +315,14 @@ def mean(self):
316
315
"""
317
316
return self ._cython_agg_general ('mean' )
318
317
318
+ def std (self ):
319
+ """
320
+ Compute mean of groups, excluding missing values
321
+
322
+ For multiple groupings, the result index will be a MultiIndex
323
+ """
324
+ return self ._cython_agg_general ('std' )
325
+
319
326
def size (self ):
320
327
"""
321
328
Compute group sizes
@@ -356,8 +363,8 @@ def _cython_agg_general(self, how):
356
363
else :
357
364
continue
358
365
359
- result , counts = lib . group_aggregate (obj , label_list ,
360
- shape , how = how )
366
+ result , counts = cython_aggregate (obj , label_list ,
367
+ shape , how = how )
361
368
result = result .ravel ()
362
369
mask = counts .ravel () > 0
363
370
output [name ] = result [mask ]
@@ -1315,15 +1322,7 @@ def generate_groups(data, label_list, shape, axis=0, factory=lambda x: x):
1315
1322
-------
1316
1323
generator
1317
1324
"""
1318
- # indexer = np.lexsort(label_list[::-1])
1319
- group_index = get_group_index (label_list , shape )
1320
- na_mask = np .zeros (len (label_list [0 ]), dtype = bool )
1321
- for arr in label_list :
1322
- na_mask |= arr == - 1
1323
- group_index [na_mask ] = - 1
1324
- indexer = lib .groupsort_indexer (group_index .astype ('i4' ),
1325
- np .prod (shape ))
1326
-
1325
+ indexer = _get_group_sorter (label_list , shape )
1327
1326
sorted_labels = [labels .take (indexer ) for labels in label_list ]
1328
1327
1329
1328
if isinstance (data , BlockManager ):
@@ -1342,6 +1341,17 @@ def generate_groups(data, label_list, shape, axis=0, factory=lambda x: x):
1342
1341
for key , group in gen :
1343
1342
yield key , group
1344
1343
1344
+ def _get_group_sorter (label_list , shape ):
1345
+ group_index = get_group_index (label_list , shape )
1346
+ na_mask = np .zeros (len (label_list [0 ]), dtype = bool )
1347
+ for arr in label_list :
1348
+ na_mask |= arr == - 1
1349
+ group_index [na_mask ] = - 1
1350
+ indexer = lib .groupsort_indexer (group_index .astype ('i4' ),
1351
+ np .prod (shape ))
1352
+
1353
+ return indexer
1354
+
1345
1355
def _generate_groups (data , labels , shape , start , end , axis = 0 , which = 0 ,
1346
1356
factory = lambda x : x ):
1347
1357
axis_labels = labels [which ][start :end ]
@@ -1385,6 +1395,50 @@ def slicer(data, slob):
1385
1395
1386
1396
left = right
1387
1397
1398
+ def get_group_index (label_list , shape ):
1399
+ n = len (label_list [0 ])
1400
+ group_index = np .zeros (n , dtype = int )
1401
+ mask = np .zeros (n , dtype = bool )
1402
+ for i in xrange (len (shape )):
1403
+ stride = np .prod ([x for x in shape [i + 1 :]], dtype = int )
1404
+ group_index += label_list [i ] * stride
1405
+ mask |= label_list [i ] < 0
1406
+
1407
+ np .putmask (group_index , mask , - 1 )
1408
+ return group_index
1409
+
1410
+ #----------------------------------------------------------------------
1411
+ # Group aggregations in Cython
1412
+
1413
+
1414
+ def cython_aggregate (values , label_list , shape , how = 'add' ):
1415
+ agg_func = _cython_functions [how ]
1416
+ trans_func = _cython_transforms .get (how , lambda x : x )
1417
+
1418
+ group_index = get_group_index (label_list , shape ).astype ('i4' )
1419
+
1420
+ result = np .empty (shape , dtype = np .float64 )
1421
+ result .fill (np .nan )
1422
+
1423
+ counts = np .zeros (shape , dtype = np .int32 )
1424
+ agg_func (result .ravel (), counts .ravel (), values ,
1425
+ group_index )
1426
+
1427
+ result = trans_func (result )
1428
+
1429
+ return result , counts
1430
+
1431
+ _cython_functions = {
1432
+ 'add' : lib .group_add ,
1433
+ 'mean' : lib .group_mean ,
1434
+ 'var' : lib .group_var ,
1435
+ 'std' : lib .group_var
1436
+ }
1437
+
1438
+ _cython_transforms = {
1439
+ 'std' : np .sqrt
1440
+ }
1441
+
1388
1442
#----------------------------------------------------------------------
1389
1443
# sorting levels...cleverly?
1390
1444
0 commit comments