38
38
_ensure_float )
39
39
from pandas .core .dtypes .cast import maybe_downcast_to_dtype
40
40
from pandas .core .dtypes .generic import ABCSeries
41
- from pandas .core .dtypes .missing import isna , notna , _maybe_fill
41
+ from pandas .core .dtypes .missing import isna , isnull , notna , _maybe_fill
42
42
43
43
from pandas .core .base import (PandasObject , SelectionMixin , GroupByError ,
44
44
DataError , SpecificationError )
@@ -877,28 +877,21 @@ def apply(self, func, *args, **kwargs):
877
877
878
878
func = self ._is_builtin_func (func )
879
879
880
- # Try to go down the Cython path first
881
- try :
882
- f = self .grouper ._cython_functions ['apply' ][func ]
883
- return self .grouper ._cython_apply (f , self ._selected_obj , self .axis ,
884
- ** kwargs )
885
- except KeyError :
886
- # this is needed so we don't try and wrap strings. If we could
887
- # resolve functions to their callable functions prior, this
888
- # wouldn't be needed
889
- if args or kwargs :
890
- if callable (func ):
891
-
892
- @wraps (func )
893
- def f (g ):
894
- with np .errstate (all = 'ignore' ):
895
- return func (g , * args , ** kwargs )
896
- else :
897
- raise ValueError ('func must be a callable if args or '
898
- 'kwargs are supplied and func is not '
899
- 'implemented in Cython' )
880
+ # this is needed so we don't try and wrap strings. If we could
881
+ # resolve functions to their callable functions prior, this
882
+ # wouldn't be needed
883
+ if args or kwargs :
884
+ if callable (func ):
885
+
886
+ @wraps (func )
887
+ def f (g ):
888
+ with np .errstate (all = 'ignore' ):
889
+ return func (g , * args , ** kwargs )
900
890
else :
901
- f = func
891
+ raise ValueError ('func must be a callable if args or '
892
+ 'kwargs are supplied' )
893
+ else :
894
+ f = func
902
895
903
896
# ignore SettingWithCopy here in case the user mutates
904
897
with option_context ('mode.chained_assignment' , None ):
@@ -1464,6 +1457,25 @@ def expanding(self, *args, **kwargs):
1464
1457
from pandas .core .window import ExpandingGroupby
1465
1458
return ExpandingGroupby (self , * args , ** kwargs )
1466
1459
1460
+ def _fill (self , how , limit = None ):
1461
+ labels , _ , _ = self .grouper .group_info
1462
+
1463
+ # Need int value for Cython
1464
+ if limit is None :
1465
+ limit = - 1
1466
+ output = {}
1467
+ if type (self ) is DataFrameGroupBy :
1468
+ for nm in self .grouper .names :
1469
+ output [nm ] = self .obj [nm ].values
1470
+ for name , obj in self ._iterate_slices ():
1471
+ indexer = np .zeros_like (labels )
1472
+ mask = isnull (obj .values ).view (np .uint8 )
1473
+ libgroupby .group_fillna_indexer (indexer , mask , labels , how ,
1474
+ limit )
1475
+ output [name ] = algorithms .take_nd (obj .values , indexer )
1476
+
1477
+ return self ._wrap_transformed_output (output )
1478
+
1467
1479
@Substitution (name = 'groupby' )
1468
1480
def pad (self , limit = None ):
1469
1481
"""
@@ -1481,7 +1493,7 @@ def pad(self, limit=None):
1481
1493
Series.fillna
1482
1494
DataFrame.fillna
1483
1495
"""
1484
- return self .apply ('ffill' , limit = limit )
1496
+ return self ._fill ('ffill' , limit = limit )
1485
1497
ffill = pad
1486
1498
1487
1499
@Substitution (name = 'groupby' )
@@ -1501,7 +1513,7 @@ def backfill(self, limit=None):
1501
1513
Series.fillna
1502
1514
DataFrame.fillna
1503
1515
"""
1504
- return self .apply ('bfill' , limit = limit )
1516
+ return self ._fill ('bfill' , limit = limit )
1505
1517
bfill = backfill
1506
1518
1507
1519
@Substitution (name = 'groupby' )
@@ -2041,38 +2053,6 @@ def _get_group_keys(self):
2041
2053
self .levels ,
2042
2054
self .labels )
2043
2055
2044
- def _cython_apply (self , ftype , data , axis , ** kwargs ):
2045
- def _generate_output (ser ):
2046
- # duplicative of _get_cython_function; needs refactor
2047
- dtype_str = ser .dtype .name
2048
- values = ser .values [:, None ]
2049
- func = afunc = self ._get_func (ftype ['name' ], dtype_str )
2050
- f = ftype .get ('f' )
2051
-
2052
- def wrapper (* args , ** kwargs ):
2053
- return f (afunc , * args , ** kwargs )
2054
-
2055
- func = wrapper
2056
- labels , _ , _ = self .group_info
2057
-
2058
- result = _maybe_fill (np .empty_like (values , dtype = dtype_str ),
2059
- fill_value = np .nan )
2060
- func (result , values , labels , ** kwargs )
2061
-
2062
- return result [:, 0 ]
2063
-
2064
- # Using introspection to determine result; not ideal needs refactor
2065
- if type (data ) is Series :
2066
- return Series (_generate_output (data ), name = data .name )
2067
- else :
2068
- output = collections .OrderedDict ()
2069
- for col in data .columns :
2070
- if col in self .names :
2071
- output [col ] = data [col ].values
2072
- else :
2073
- output [col ] = _generate_output (data [col ])
2074
- return DataFrame (output , index = data .index )
2075
-
2076
2056
def apply (self , f , data , axis = 0 ):
2077
2057
mutated = self .mutated
2078
2058
splitter = self ._get_splitter (data , axis = axis )
@@ -2269,22 +2249,6 @@ def get_group_levels(self):
2269
2249
kwargs .get ('na_option' , 'keep' )
2270
2250
)
2271
2251
}
2272
- },
2273
- 'apply' : {
2274
- 'ffill' : {
2275
- 'name' : 'group_fillna' ,
2276
- 'f' : lambda func , a , b , c , ** kwargs : func (
2277
- a , b , c ,
2278
- 'ffill' , kwargs ['limit' ] if kwargs ['limit' ] else - 1
2279
- )
2280
- },
2281
- 'bfill' : {
2282
- 'name' : 'group_fillna' ,
2283
- 'f' : lambda func , a , b , c , ** kwargs : func (
2284
- a , b , c ,
2285
- 'bfill' , kwargs ['limit' ] if kwargs ['limit' ] else - 1
2286
- )
2287
- }
2288
2252
}
2289
2253
}
2290
2254
@@ -2303,28 +2267,27 @@ def _is_builtin_func(self, arg):
2303
2267
"""
2304
2268
return SelectionMixin ._builtin_table .get (arg , arg )
2305
2269
2306
- def _get_func (self , fname , dtype_str = None , is_numeric = False ):
2307
- # see if there is a fused-type version of function
2308
- # only valid for numeric
2309
- f = getattr (libgroupby , fname , None )
2310
- if f is not None and is_numeric :
2311
- return f
2312
-
2313
- # otherwise find dtype-specific version, falling back to object
2314
- for dt in [dtype_str , 'object' ]:
2315
- f = getattr (libgroupby , "%s_%s" % (fname , dtype_str ), None )
2316
- if f is not None :
2317
- return f
2318
-
2319
2270
def _get_cython_function (self , kind , how , values , is_numeric ):
2320
2271
2321
2272
dtype_str = values .dtype .name
2322
2273
2274
+ def get_func (fname ):
2275
+ # see if there is a fused-type version of function
2276
+ # only valid for numeric
2277
+ f = getattr (libgroupby , fname , None )
2278
+ if f is not None and is_numeric :
2279
+ return f
2280
+
2281
+ # otherwise find dtype-specific version, falling back to object
2282
+ for dt in [dtype_str , 'object' ]:
2283
+ f = getattr (libgroupby , "%s_%s" % (fname , dtype_str ), None )
2284
+ if f is not None :
2285
+ return f
2286
+
2323
2287
ftype = self ._cython_functions [kind ][how ]
2324
2288
2325
2289
if isinstance (ftype , dict ):
2326
- func = afunc = self ._get_func (ftype ['name' ], dtype_str = dtype_str ,
2327
- is_numeric = is_numeric )
2290
+ func = afunc = get_func (ftype ['name' ])
2328
2291
2329
2292
# a sub-function
2330
2293
f = ftype .get ('f' )
@@ -2337,8 +2300,7 @@ def wrapper(*args, **kwargs):
2337
2300
func = wrapper
2338
2301
2339
2302
else :
2340
- func = self ._get_func (ftype , dtype_str = dtype_str ,
2341
- is_numeric = is_numeric )
2303
+ func = get_func (ftype )
2342
2304
2343
2305
if func is None :
2344
2306
raise NotImplementedError ("function is not implemented for this"
0 commit comments