@@ -994,20 +994,24 @@ def _transform_should_cast(self, func_nm):
994
994
return (self .size ().fillna (0 ) > 0 ).any () and (func_nm not in
995
995
_cython_cast_blacklist )
996
996
997
- def _cython_transform (self , how , numeric_only = True ):
997
+ def _cython_transform (self , how , numeric_only = True , ** kwargs ):
998
998
output = collections .OrderedDict ()
999
999
for name , obj in self ._iterate_slices ():
1000
1000
is_numeric = is_numeric_dtype (obj .dtype )
1001
1001
if numeric_only and not is_numeric :
1002
1002
continue
1003
1003
1004
1004
try :
1005
- result , names = self .grouper .transform (obj .values , how )
1005
+ result , names = self .grouper .transform (obj .values , how ,
1006
+ ** kwargs )
1006
1007
except NotImplementedError :
1007
1008
continue
1008
1009
except AssertionError as e :
1009
1010
raise GroupByError (str (e ))
1010
- output [name ] = self ._try_cast (result , obj )
1011
+ if self ._transform_should_cast (how ):
1012
+ output [name ] = self ._try_cast (result , obj )
1013
+ else :
1014
+ output [name ] = result
1011
1015
1012
1016
if len (output ) == 0 :
1013
1017
raise DataError ('No numeric types to aggregate' )
@@ -1768,6 +1772,37 @@ def cumcount(self, ascending=True):
1768
1772
cumcounts = self ._cumcount_array (ascending = ascending )
1769
1773
return Series (cumcounts , index )
1770
1774
1775
+ @Substitution (name = 'groupby' )
1776
+ @Appender (_doc_template )
1777
+ def rank (self , method = 'average' , ascending = True , na_option = 'keep' ,
1778
+ pct = False , axis = 0 ):
1779
+ """Provides the rank of values within each group
1780
+
1781
+ Parameters
1782
+ ----------
1783
+ method : {'average', 'min', 'max', 'first', 'dense'}, efault 'average'
1784
+ * average: average rank of group
1785
+ * min: lowest rank in group
1786
+ * max: highest rank in group
1787
+ * first: ranks assigned in order they appear in the array
1788
+ * dense: like 'min', but rank always increases by 1 between groups
1789
+ method : {'keep', 'top', 'bottom'}, default 'keep'
1790
+ * keep: leave NA values where they are
1791
+ * top: smallest rank if ascending
1792
+ * bottom: smallest rank if descending
1793
+ ascending : boolean, default True
1794
+ False for ranks by high (1) to low (N)
1795
+ pct : boolean, default False
1796
+ Compute percentage rank of data within each group
1797
+
1798
+ Returns
1799
+ -----
1800
+ DataFrame with ranking of values within each group
1801
+ """
1802
+ return self ._cython_transform ('rank' , numeric_only = False ,
1803
+ ties_method = method , ascending = ascending ,
1804
+ na_option = na_option , pct = pct , axis = axis )
1805
+
1771
1806
@Substitution (name = 'groupby' )
1772
1807
@Appender (_doc_template )
1773
1808
def cumprod (self , axis = 0 , * args , ** kwargs ):
@@ -2183,6 +2218,16 @@ def get_group_levels(self):
2183
2218
'cumsum' : 'group_cumsum' ,
2184
2219
'cummin' : 'group_cummin' ,
2185
2220
'cummax' : 'group_cummax' ,
2221
+ 'rank' : {
2222
+ 'name' : 'group_rank' ,
2223
+ 'f' : lambda func , a , b , c , d , ** kwargs : func (
2224
+ a , b , c , d ,
2225
+ kwargs .get ('ties_method' , 'average' ),
2226
+ kwargs .get ('ascending' , True ),
2227
+ kwargs .get ('pct' , False ),
2228
+ kwargs .get ('na_option' , 'keep' )
2229
+ )
2230
+ }
2186
2231
}
2187
2232
}
2188
2233
@@ -2242,7 +2287,8 @@ def wrapper(*args, **kwargs):
2242
2287
(how , dtype_str ))
2243
2288
return func
2244
2289
2245
- def _cython_operation (self , kind , values , how , axis , min_count = - 1 ):
2290
+ def _cython_operation (self , kind , values , how , axis , min_count = - 1 ,
2291
+ ** kwargs ):
2246
2292
assert kind in ['transform' , 'aggregate' ]
2247
2293
2248
2294
# can we do this operation with our cython functions
@@ -2314,10 +2360,13 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1):
2314
2360
else :
2315
2361
raise
2316
2362
2317
- if is_numeric :
2318
- out_dtype = '%s%d' % ( values . dtype . kind , values . dtype . itemsize )
2363
+ if how == 'rank' :
2364
+ out_dtype = 'float'
2319
2365
else :
2320
- out_dtype = 'object'
2366
+ if is_numeric :
2367
+ out_dtype = '%s%d' % (values .dtype .kind , values .dtype .itemsize )
2368
+ else :
2369
+ out_dtype = 'object'
2321
2370
2322
2371
labels , _ , _ = self .group_info
2323
2372
@@ -2334,7 +2383,8 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1):
2334
2383
2335
2384
# TODO: min_count
2336
2385
result = self ._transform (
2337
- result , values , labels , func , is_numeric , is_datetimelike )
2386
+ result , values , labels , func , is_numeric , is_datetimelike ,
2387
+ ** kwargs )
2338
2388
2339
2389
if is_integer_dtype (result ) and not is_datetimelike :
2340
2390
mask = result == iNaT
@@ -2373,8 +2423,8 @@ def aggregate(self, values, how, axis=0, min_count=-1):
2373
2423
return self ._cython_operation ('aggregate' , values , how , axis ,
2374
2424
min_count = min_count )
2375
2425
2376
- def transform (self , values , how , axis = 0 ):
2377
- return self ._cython_operation ('transform' , values , how , axis )
2426
+ def transform (self , values , how , axis = 0 , ** kwargs ):
2427
+ return self ._cython_operation ('transform' , values , how , axis , ** kwargs )
2378
2428
2379
2429
def _aggregate (self , result , counts , values , comp_ids , agg_func ,
2380
2430
is_numeric , is_datetimelike , min_count = - 1 ):
@@ -2394,7 +2444,7 @@ def _aggregate(self, result, counts, values, comp_ids, agg_func,
2394
2444
return result
2395
2445
2396
2446
def _transform (self , result , values , comp_ids , transform_func ,
2397
- is_numeric , is_datetimelike ):
2447
+ is_numeric , is_datetimelike , ** kwargs ):
2398
2448
2399
2449
comp_ids , _ , ngroups = self .group_info
2400
2450
if values .ndim > 3 :
@@ -2406,9 +2456,9 @@ def _transform(self, result, values, comp_ids, transform_func,
2406
2456
2407
2457
chunk = chunk .squeeze ()
2408
2458
transform_func (result [:, :, i ], values ,
2409
- comp_ids , is_datetimelike )
2459
+ comp_ids , is_datetimelike , ** kwargs )
2410
2460
else :
2411
- transform_func (result , values , comp_ids , is_datetimelike )
2461
+ transform_func (result , values , comp_ids , is_datetimelike , ** kwargs )
2412
2462
2413
2463
return result
2414
2464
0 commit comments