@@ -877,21 +877,28 @@ def apply(self, func, *args, **kwargs):
877
877
878
878
func = self ._is_builtin_func (func )
879
879
880
- # this is needed so we don't try and wrap strings. If we could
881
- # resolve functions to their callable functions prior, this
882
- # wouldn't be needed
883
- if args or kwargs :
884
- if callable (func ):
885
-
886
- @wraps (func )
887
- def f (g ):
888
- with np .errstate (all = 'ignore' ):
889
- return func (g , * args , ** kwargs )
880
+ # Try to go down the Cython path first
881
+ try :
882
+ f = self .grouper ._cython_functions ['apply' ][func ]
883
+ return self .grouper ._cython_apply (f , self ._selected_obj , self .axis ,
884
+ ** kwargs )
885
+ except KeyError :
886
+ # this is needed so we don't try and wrap strings. If we could
887
+ # resolve functions to their callable functions prior, this
888
+ # wouldn't be needed
889
+ if args or kwargs :
890
+ if callable (func ):
891
+
892
+ @wraps (func )
893
+ def f (g ):
894
+ with np .errstate (all = 'ignore' ):
895
+ return func (g , * args , ** kwargs )
896
+ else :
897
+ raise ValueError ('func must be a callable if args or '
898
+ 'kwargs are supplied and func is not '
899
+ 'implemented in Cython' )
890
900
else :
891
- raise ValueError ('func must be a callable if args or '
892
- 'kwargs are supplied' )
893
- else :
894
- f = func
901
+ f = func
895
902
896
903
# ignore SettingWithCopy here in case the user mutates
897
904
with option_context ('mode.chained_assignment' , None ):
@@ -1474,7 +1481,7 @@ def pad(self, limit=None):
1474
1481
Series.fillna
1475
1482
DataFrame.fillna
1476
1483
"""
1477
- return self .apply (lambda x : x . ffill ( limit = limit ) )
1484
+ return self .apply (' ffill' , limit = limit )
1478
1485
ffill = pad
1479
1486
1480
1487
@Substitution (name = 'groupby' )
@@ -1494,7 +1501,7 @@ def backfill(self, limit=None):
1494
1501
Series.fillna
1495
1502
DataFrame.fillna
1496
1503
"""
1497
- return self .apply (lambda x : x . bfill ( limit = limit ) )
1504
+ return self .apply (' bfill' , limit = limit )
1498
1505
bfill = backfill
1499
1506
1500
1507
@Substitution (name = 'groupby' )
@@ -2034,6 +2041,32 @@ def _get_group_keys(self):
2034
2041
self .levels ,
2035
2042
self .labels )
2036
2043
2044
+ def _cython_apply (self , f , data , axis , ** kwargs ):
2045
+ output = collections .OrderedDict ()
2046
+ for col in data .columns :
2047
+ if col in self .names :
2048
+ output [col ] = data [col ].values
2049
+ else :
2050
+ # duplicative of _get_cython_function; needs refactor
2051
+ dtype_str = data [col ].dtype .name
2052
+ values = data [col ].values [:, None ]
2053
+ func = afunc = self ._get_func (f ['name' ], dtype_str )
2054
+ f = f .get ('f' )
2055
+
2056
+ def wrapper (* args , ** kwargs ):
2057
+ return f (afunc , * args , ** kwargs )
2058
+
2059
+ func = wrapper
2060
+ labels , _ , _ = self .group_info
2061
+
2062
+ result = _maybe_fill (np .empty_like (values , dtype = dtype_str ),
2063
+ fill_value = np .nan )
2064
+ func (result , values , labels , ** kwargs )
2065
+ output [col ] = result [:, 0 ]
2066
+
2067
+ # Ugh
2068
+ return DataFrame (output , index = data .index )
2069
+
2037
2070
def apply (self , f , data , axis = 0 ):
2038
2071
mutated = self .mutated
2039
2072
splitter = self ._get_splitter (data , axis = axis )
@@ -2230,6 +2263,22 @@ def get_group_levels(self):
2230
2263
kwargs .get ('na_option' , 'keep' )
2231
2264
)
2232
2265
}
2266
+ },
2267
+ 'apply' : {
2268
+ 'ffill' : {
2269
+ 'name' : 'group_fillna' ,
2270
+ 'f' : lambda func , a , b , c , ** kwargs : func (
2271
+ a , b , c ,
2272
+ 'ffill' , kwargs ['limit' ] if kwargs ['limit' ] else - 1
2273
+ )
2274
+ },
2275
+ 'bfill' : {
2276
+ 'name' : 'group_fillna' ,
2277
+ 'f' : lambda func , a , b , c , ** kwargs : func (
2278
+ a , b , c ,
2279
+ 'bfill' , kwargs ['limit' ] if kwargs ['limit' ] else - 1
2280
+ )
2281
+ }
2233
2282
}
2234
2283
}
2235
2284
@@ -2248,27 +2297,28 @@ def _is_builtin_func(self, arg):
2248
2297
"""
2249
2298
return SelectionMixin ._builtin_table .get (arg , arg )
2250
2299
2251
- def _get_cython_function (self , kind , how , values , is_numeric ):
2252
-
2253
- dtype_str = values .dtype .name
2300
+ def _get_func (self , fname , dtype_str = None , is_numeric = False ):
2301
+ # see if there is a fused-type version of function
2302
+ # only valid for numeric
2303
+ f = getattr (libgroupby , fname , None )
2304
+ if f is not None and is_numeric :
2305
+ return f
2254
2306
2255
- def get_func (fname ):
2256
- # see if there is a fused-type version of function
2257
- # only valid for numeric
2258
- f = getattr (libgroupby , fname , None )
2259
- if f is not None and is_numeric :
2307
+ # otherwise find dtype-specific version, falling back to object
2308
+ for dt in [dtype_str , 'object' ]:
2309
+ f = getattr (libgroupby , "%s_%s" % (fname , dtype_str ), None )
2310
+ if f is not None :
2260
2311
return f
2261
2312
2262
- # otherwise find dtype-specific version, falling back to object
2263
- for dt in [dtype_str , 'object' ]:
2264
- f = getattr (libgroupby , "%s_%s" % (fname , dtype_str ), None )
2265
- if f is not None :
2266
- return f
2313
+ def _get_cython_function (self , kind , how , values , is_numeric ):
2314
+
2315
+ dtype_str = values .dtype .name
2267
2316
2268
2317
ftype = self ._cython_functions [kind ][how ]
2269
2318
2270
2319
if isinstance (ftype , dict ):
2271
- func = afunc = get_func (ftype ['name' ])
2320
+ func = afunc = self ._get_func (ftype ['name' ], dtype_str = dtype_str ,
2321
+ is_numeric = is_numeric )
2272
2322
2273
2323
# a sub-function
2274
2324
f = ftype .get ('f' )
@@ -2281,7 +2331,8 @@ def wrapper(*args, **kwargs):
2281
2331
func = wrapper
2282
2332
2283
2333
else :
2284
- func = get_func (ftype )
2334
+ func = self ._get_func (ftype , dtype_str = dtype_str ,
2335
+ is_numeric = is_numeric )
2285
2336
2286
2337
if func is None :
2287
2338
raise NotImplementedError ("function is not implemented for this"
0 commit comments