@@ -875,21 +875,28 @@ def apply(self, func, *args, **kwargs):
875
875
876
876
func = self ._is_builtin_func (func )
877
877
878
- # this is needed so we don't try and wrap strings. If we could
879
- # resolve functions to their callable functions prior, this
880
- # wouldn't be needed
881
- if args or kwargs :
882
- if callable (func ):
883
-
884
- @wraps (func )
885
- def f (g ):
886
- with np .errstate (all = 'ignore' ):
887
- return func (g , * args , ** kwargs )
878
+ # Try to go down the Cython path first
879
+ try :
880
+ f = self .grouper ._cython_functions ['apply' ][func ]
881
+ return self .grouper ._cython_apply (f , self ._selected_obj , self .axis ,
882
+ ** kwargs )
883
+ except KeyError :
884
+ # this is needed so we don't try and wrap strings. If we could
885
+ # resolve functions to their callable functions prior, this
886
+ # wouldn't be needed
887
+ if args or kwargs :
888
+ if callable (func ):
889
+
890
+ @wraps (func )
891
+ def f (g ):
892
+ with np .errstate (all = 'ignore' ):
893
+ return func (g , * args , ** kwargs )
894
+ else :
895
+ raise ValueError ('func must be a callable if args or '
896
+ 'kwargs are supplied and func is not '
897
+ 'implemented in Cython' )
888
898
else :
889
- raise ValueError ('func must be a callable if args or '
890
- 'kwargs are supplied' )
891
- else :
892
- f = func
899
+ f = func
893
900
894
901
# ignore SettingWithCopy here in case the user mutates
895
902
with option_context ('mode.chained_assignment' , None ):
@@ -1472,7 +1479,7 @@ def pad(self, limit=None):
1472
1479
Series.fillna
1473
1480
DataFrame.fillna
1474
1481
"""
1475
- return self .apply (lambda x : x . ffill ( limit = limit ) )
1482
+ return self .apply (' ffill' , limit = limit )
1476
1483
ffill = pad
1477
1484
1478
1485
@Substitution (name = 'groupby' )
@@ -1492,7 +1499,7 @@ def backfill(self, limit=None):
1492
1499
Series.fillna
1493
1500
DataFrame.fillna
1494
1501
"""
1495
- return self .apply (lambda x : x . bfill ( limit = limit ) )
1502
+ return self .apply (' bfill' , limit = limit )
1496
1503
bfill = backfill
1497
1504
1498
1505
@Substitution (name = 'groupby' )
@@ -2032,6 +2039,32 @@ def _get_group_keys(self):
2032
2039
self .levels ,
2033
2040
self .labels )
2034
2041
2042
+ def _cython_apply (self , f , data , axis , ** kwargs ):
2043
+ output = collections .OrderedDict ()
2044
+ for col in data .columns :
2045
+ if col in self .names :
2046
+ output [col ] = data [col ].values
2047
+ else :
2048
+ # duplicative of _get_cython_function; needs refactor
2049
+ dtype_str = data [col ].dtype .name
2050
+ values = data [col ].values [:, None ]
2051
+ func = afunc = self ._get_func (f ['name' ], dtype_str )
2052
+ f = f .get ('f' )
2053
+
2054
+ def wrapper (* args , ** kwargs ):
2055
+ return f (afunc , * args , ** kwargs )
2056
+
2057
+ func = wrapper
2058
+ labels , _ , _ = self .group_info
2059
+
2060
+ result = _maybe_fill (np .empty_like (values , dtype = dtype_str ),
2061
+ fill_value = np .nan )
2062
+ func (result , values , labels , ** kwargs )
2063
+ output [col ] = result [:, 0 ]
2064
+
2065
+ # Ugh
2066
+ return DataFrame (output , index = data .index )
2067
+
2035
2068
def apply (self , f , data , axis = 0 ):
2036
2069
mutated = self .mutated
2037
2070
splitter = self ._get_splitter (data , axis = axis )
@@ -2228,6 +2261,22 @@ def get_group_levels(self):
2228
2261
kwargs .get ('na_option' , 'keep' )
2229
2262
)
2230
2263
}
2264
+ },
2265
+ 'apply' : {
2266
+ 'ffill' : {
2267
+ 'name' : 'group_fillna' ,
2268
+ 'f' : lambda func , a , b , c , ** kwargs : func (
2269
+ a , b , c ,
2270
+ 'ffill' , kwargs ['limit' ] if kwargs ['limit' ] else - 1
2271
+ )
2272
+ },
2273
+ 'bfill' : {
2274
+ 'name' : 'group_fillna' ,
2275
+ 'f' : lambda func , a , b , c , ** kwargs : func (
2276
+ a , b , c ,
2277
+ 'bfill' , kwargs ['limit' ] if kwargs ['limit' ] else - 1
2278
+ )
2279
+ }
2231
2280
}
2232
2281
}
2233
2282
@@ -2246,27 +2295,28 @@ def _is_builtin_func(self, arg):
2246
2295
"""
2247
2296
return SelectionMixin ._builtin_table .get (arg , arg )
2248
2297
2249
- def _get_cython_function (self , kind , how , values , is_numeric ):
2250
-
2251
- dtype_str = values .dtype .name
2298
+ def _get_func (self , fname , dtype_str = None , is_numeric = False ):
2299
+ # see if there is a fused-type version of function
2300
+ # only valid for numeric
2301
+ f = getattr (libgroupby , fname , None )
2302
+ if f is not None and is_numeric :
2303
+ return f
2252
2304
2253
- def get_func (fname ):
2254
- # see if there is a fused-type version of function
2255
- # only valid for numeric
2256
- f = getattr (libgroupby , fname , None )
2257
- if f is not None and is_numeric :
2305
+ # otherwise find dtype-specific version, falling back to object
2306
+ for dt in [dtype_str , 'object' ]:
2307
+ f = getattr (libgroupby , "%s_%s" % (fname , dtype_str ), None )
2308
+ if f is not None :
2258
2309
return f
2259
2310
2260
- # otherwise find dtype-specific version, falling back to object
2261
- for dt in [dtype_str , 'object' ]:
2262
- f = getattr (libgroupby , "%s_%s" % (fname , dtype_str ), None )
2263
- if f is not None :
2264
- return f
2311
+ def _get_cython_function (self , kind , how , values , is_numeric ):
2312
+
2313
+ dtype_str = values .dtype .name
2265
2314
2266
2315
ftype = self ._cython_functions [kind ][how ]
2267
2316
2268
2317
if isinstance (ftype , dict ):
2269
- func = afunc = get_func (ftype ['name' ])
2318
+ func = afunc = self ._get_func (ftype ['name' ], dtype_str = dtype_str ,
2319
+ is_numeric = is_numeric )
2270
2320
2271
2321
# a sub-function
2272
2322
f = ftype .get ('f' )
@@ -2279,7 +2329,8 @@ def wrapper(*args, **kwargs):
2279
2329
func = wrapper
2280
2330
2281
2331
else :
2282
- func = get_func (ftype )
2332
+ func = self ._get_func (ftype , dtype_str = dtype_str ,
2333
+ is_numeric = is_numeric )
2283
2334
2284
2335
if func is None :
2285
2336
raise NotImplementedError ("function is not implemented for this"
0 commit comments