38
38
_ensure_float )
39
39
from pandas .core .dtypes .cast import maybe_downcast_to_dtype
40
40
from pandas .core .dtypes .generic import ABCSeries
41
- from pandas .core .dtypes .missing import isna , notna , _maybe_fill
41
+ from pandas .core .dtypes .missing import isna , isnull , notna , _maybe_fill
42
42
43
43
from pandas .core .base import (PandasObject , SelectionMixin , GroupByError ,
44
44
DataError , SpecificationError )
@@ -875,28 +875,21 @@ def apply(self, func, *args, **kwargs):
875
875
876
876
func = self ._is_builtin_func (func )
877
877
878
- # Try to go down the Cython path first
879
- try :
880
- f = self .grouper ._cython_functions ['apply' ][func ]
881
- return self .grouper ._cython_apply (f , self ._selected_obj , self .axis ,
882
- ** kwargs )
883
- except KeyError :
884
- # this is needed so we don't try and wrap strings. If we could
885
- # resolve functions to their callable functions prior, this
886
- # wouldn't be needed
887
- if args or kwargs :
888
- if callable (func ):
889
-
890
- @wraps (func )
891
- def f (g ):
892
- with np .errstate (all = 'ignore' ):
893
- return func (g , * args , ** kwargs )
894
- else :
895
- raise ValueError ('func must be a callable if args or '
896
- 'kwargs are supplied and func is not '
897
- 'implemented in Cython' )
878
+ # this is needed so we don't try and wrap strings. If we could
879
+ # resolve functions to their callable functions prior, this
880
+ # wouldn't be needed
881
+ if args or kwargs :
882
+ if callable (func ):
883
+
884
+ @wraps (func )
885
+ def f (g ):
886
+ with np .errstate (all = 'ignore' ):
887
+ return func (g , * args , ** kwargs )
898
888
else :
899
- f = func
889
+ raise ValueError ('func must be a callable if args or '
890
+ 'kwargs are supplied' )
891
+ else :
892
+ f = func
900
893
901
894
# ignore SettingWithCopy here in case the user mutates
902
895
with option_context ('mode.chained_assignment' , None ):
@@ -1462,6 +1455,25 @@ def expanding(self, *args, **kwargs):
1462
1455
from pandas .core .window import ExpandingGroupby
1463
1456
return ExpandingGroupby (self , * args , ** kwargs )
1464
1457
1458
+ def _fill (self , how , limit = None ):
1459
+ labels , _ , _ = self .grouper .group_info
1460
+
1461
+ # Need int value for Cython
1462
+ if limit is None :
1463
+ limit = - 1
1464
+ output = {}
1465
+ if type (self ) is DataFrameGroupBy :
1466
+ for nm in self .grouper .names :
1467
+ output [nm ] = self .obj [nm ].values
1468
+ for name , obj in self ._iterate_slices ():
1469
+ indexer = np .zeros_like (labels )
1470
+ mask = isnull (obj .values ).view (np .uint8 )
1471
+ libgroupby .group_fillna_indexer (indexer , mask , labels , how ,
1472
+ limit )
1473
+ output [name ] = algorithms .take_nd (obj .values , indexer )
1474
+
1475
+ return self ._wrap_transformed_output (output )
1476
+
1465
1477
@Substitution (name = 'groupby' )
1466
1478
def pad (self , limit = None ):
1467
1479
"""
@@ -1479,7 +1491,7 @@ def pad(self, limit=None):
1479
1491
Series.fillna
1480
1492
DataFrame.fillna
1481
1493
"""
1482
- return self .apply ('ffill' , limit = limit )
1494
+ return self ._fill ('ffill' , limit = limit )
1483
1495
ffill = pad
1484
1496
1485
1497
@Substitution (name = 'groupby' )
@@ -1499,7 +1511,7 @@ def backfill(self, limit=None):
1499
1511
Series.fillna
1500
1512
DataFrame.fillna
1501
1513
"""
1502
- return self .apply ('bfill' , limit = limit )
1514
+ return self ._fill ('bfill' , limit = limit )
1503
1515
bfill = backfill
1504
1516
1505
1517
@Substitution (name = 'groupby' )
@@ -2039,38 +2051,6 @@ def _get_group_keys(self):
2039
2051
self .levels ,
2040
2052
self .labels )
2041
2053
2042
- def _cython_apply (self , ftype , data , axis , ** kwargs ):
2043
- def _generate_output (ser ):
2044
- # duplicative of _get_cython_function; needs refactor
2045
- dtype_str = ser .dtype .name
2046
- values = ser .values [:, None ]
2047
- func = afunc = self ._get_func (ftype ['name' ], dtype_str )
2048
- f = ftype .get ('f' )
2049
-
2050
- def wrapper (* args , ** kwargs ):
2051
- return f (afunc , * args , ** kwargs )
2052
-
2053
- func = wrapper
2054
- labels , _ , _ = self .group_info
2055
-
2056
- result = _maybe_fill (np .empty_like (values , dtype = dtype_str ),
2057
- fill_value = np .nan )
2058
- func (result , values , labels , ** kwargs )
2059
-
2060
- return result [:, 0 ]
2061
-
2062
- # Using introspection to determine result; not ideal needs refactor
2063
- if type (data ) is Series :
2064
- return Series (_generate_output (data ), name = data .name )
2065
- else :
2066
- output = collections .OrderedDict ()
2067
- for col in data .columns :
2068
- if col in self .names :
2069
- output [col ] = data [col ].values
2070
- else :
2071
- output [col ] = _generate_output (data [col ])
2072
- return DataFrame (output , index = data .index )
2073
-
2074
2054
def apply (self , f , data , axis = 0 ):
2075
2055
mutated = self .mutated
2076
2056
splitter = self ._get_splitter (data , axis = axis )
@@ -2267,22 +2247,6 @@ def get_group_levels(self):
2267
2247
kwargs .get ('na_option' , 'keep' )
2268
2248
)
2269
2249
}
2270
- },
2271
- 'apply' : {
2272
- 'ffill' : {
2273
- 'name' : 'group_fillna' ,
2274
- 'f' : lambda func , a , b , c , ** kwargs : func (
2275
- a , b , c ,
2276
- 'ffill' , kwargs ['limit' ] if kwargs ['limit' ] else - 1
2277
- )
2278
- },
2279
- 'bfill' : {
2280
- 'name' : 'group_fillna' ,
2281
- 'f' : lambda func , a , b , c , ** kwargs : func (
2282
- a , b , c ,
2283
- 'bfill' , kwargs ['limit' ] if kwargs ['limit' ] else - 1
2284
- )
2285
- }
2286
2250
}
2287
2251
}
2288
2252
@@ -2301,28 +2265,27 @@ def _is_builtin_func(self, arg):
2301
2265
"""
2302
2266
return SelectionMixin ._builtin_table .get (arg , arg )
2303
2267
2304
- def _get_func (self , fname , dtype_str = None , is_numeric = False ):
2305
- # see if there is a fused-type version of function
2306
- # only valid for numeric
2307
- f = getattr (libgroupby , fname , None )
2308
- if f is not None and is_numeric :
2309
- return f
2310
-
2311
- # otherwise find dtype-specific version, falling back to object
2312
- for dt in [dtype_str , 'object' ]:
2313
- f = getattr (libgroupby , "%s_%s" % (fname , dtype_str ), None )
2314
- if f is not None :
2315
- return f
2316
-
2317
2268
def _get_cython_function (self , kind , how , values , is_numeric ):
2318
2269
2319
2270
dtype_str = values .dtype .name
2320
2271
2272
+ def get_func (fname ):
2273
+ # see if there is a fused-type version of function
2274
+ # only valid for numeric
2275
+ f = getattr (libgroupby , fname , None )
2276
+ if f is not None and is_numeric :
2277
+ return f
2278
+
2279
+ # otherwise find dtype-specific version, falling back to object
2280
+ for dt in [dtype_str , 'object' ]:
2281
+ f = getattr (libgroupby , "%s_%s" % (fname , dtype_str ), None )
2282
+ if f is not None :
2283
+ return f
2284
+
2321
2285
ftype = self ._cython_functions [kind ][how ]
2322
2286
2323
2287
if isinstance (ftype , dict ):
2324
- func = afunc = self ._get_func (ftype ['name' ], dtype_str = dtype_str ,
2325
- is_numeric = is_numeric )
2288
+ func = afunc = get_func (ftype ['name' ])
2326
2289
2327
2290
# a sub-function
2328
2291
f = ftype .get ('f' )
@@ -2335,8 +2298,7 @@ def wrapper(*args, **kwargs):
2335
2298
func = wrapper
2336
2299
2337
2300
else :
2338
- func = self ._get_func (ftype , dtype_str = dtype_str ,
2339
- is_numeric = is_numeric )
2301
+ func = get_func (ftype )
2340
2302
2341
2303
if func is None :
2342
2304
raise NotImplementedError ("function is not implemented for this"
0 commit comments