Skip to content

Commit 680131d

Browse files
committed
Connected GroupBy method to Cython fillna
1 parent 3c4a188 commit 680131d

File tree

1 file changed

+82
-31
lines changed

1 file changed

+82
-31
lines changed

pandas/core/groupby.py

+82-31
Original file line numberDiff line numberDiff line change
@@ -875,21 +875,28 @@ def apply(self, func, *args, **kwargs):
875875

876876
func = self._is_builtin_func(func)
877877

878-
# this is needed so we don't try and wrap strings. If we could
879-
# resolve functions to their callable functions prior, this
880-
# wouldn't be needed
881-
if args or kwargs:
882-
if callable(func):
883-
884-
@wraps(func)
885-
def f(g):
886-
with np.errstate(all='ignore'):
887-
return func(g, *args, **kwargs)
878+
# Try to go down the Cython path first
879+
try:
880+
f = self.grouper._cython_functions['apply'][func]
881+
return self.grouper._cython_apply(f, self._selected_obj, self.axis,
882+
**kwargs)
883+
except KeyError:
884+
# this is needed so we don't try and wrap strings. If we could
885+
# resolve functions to their callable functions prior, this
886+
# wouldn't be needed
887+
if args or kwargs:
888+
if callable(func):
889+
890+
@wraps(func)
891+
def f(g):
892+
with np.errstate(all='ignore'):
893+
return func(g, *args, **kwargs)
894+
else:
895+
raise ValueError('func must be a callable if args or '
896+
'kwargs are supplied and func is not '
897+
'implemented in Cython')
888898
else:
889-
raise ValueError('func must be a callable if args or '
890-
'kwargs are supplied')
891-
else:
892-
f = func
899+
f = func
893900

894901
# ignore SettingWithCopy here in case the user mutates
895902
with option_context('mode.chained_assignment', None):
@@ -1472,7 +1479,7 @@ def pad(self, limit=None):
14721479
Series.fillna
14731480
DataFrame.fillna
14741481
"""
1475-
return self.apply(lambda x: x.ffill(limit=limit))
1482+
return self.apply('ffill', limit=limit)
14761483
ffill = pad
14771484

14781485
@Substitution(name='groupby')
@@ -1492,7 +1499,7 @@ def backfill(self, limit=None):
14921499
Series.fillna
14931500
DataFrame.fillna
14941501
"""
1495-
return self.apply(lambda x: x.bfill(limit=limit))
1502+
return self.apply('bfill', limit=limit)
14961503
bfill = backfill
14971504

14981505
@Substitution(name='groupby')
@@ -2032,6 +2039,32 @@ def _get_group_keys(self):
20322039
self.levels,
20332040
self.labels)
20342041

2042+
def _cython_apply(self, f, data, axis, **kwargs):
2043+
output = collections.OrderedDict()
2044+
for col in data.columns:
2045+
if col in self.names:
2046+
output[col] = data[col].values
2047+
else:
2048+
# duplicative of _get_cython_function; needs refactor
2049+
dtype_str = data[col].dtype.name
2050+
values = data[col].values[:, None]
2051+
func = afunc = self._get_func(f['name'], dtype_str)
2052+
f = f.get('f')
2053+
2054+
def wrapper(*args, **kwargs):
2055+
return f(afunc, *args, **kwargs)
2056+
2057+
func = wrapper
2058+
labels, _, _ = self.group_info
2059+
2060+
result = _maybe_fill(np.empty_like(values, dtype=dtype_str),
2061+
fill_value=np.nan)
2062+
func(result, values, labels, **kwargs)
2063+
output[col] = result[:, 0]
2064+
2065+
# Ugh
2066+
return DataFrame(output, index=data.index)
2067+
20352068
def apply(self, f, data, axis=0):
20362069
mutated = self.mutated
20372070
splitter = self._get_splitter(data, axis=axis)
@@ -2228,6 +2261,22 @@ def get_group_levels(self):
22282261
kwargs.get('na_option', 'keep')
22292262
)
22302263
}
2264+
},
2265+
'apply': {
2266+
'ffill': {
2267+
'name': 'group_fillna',
2268+
'f': lambda func, a, b, c, **kwargs: func(
2269+
a, b, c,
2270+
'ffill', kwargs['limit'] if kwargs['limit'] else -1
2271+
)
2272+
},
2273+
'bfill': {
2274+
'name': 'group_fillna',
2275+
'f': lambda func, a, b, c, **kwargs: func(
2276+
a, b, c,
2277+
'bfill', kwargs['limit'] if kwargs['limit'] else -1
2278+
)
2279+
}
22312280
}
22322281
}
22332282

@@ -2246,27 +2295,28 @@ def _is_builtin_func(self, arg):
22462295
"""
22472296
return SelectionMixin._builtin_table.get(arg, arg)
22482297

2249-
def _get_cython_function(self, kind, how, values, is_numeric):
2250-
2251-
dtype_str = values.dtype.name
2298+
def _get_func(self, fname, dtype_str=None, is_numeric=False):
2299+
# see if there is a fused-type version of function
2300+
# only valid for numeric
2301+
f = getattr(libgroupby, fname, None)
2302+
if f is not None and is_numeric:
2303+
return f
22522304

2253-
def get_func(fname):
2254-
# see if there is a fused-type version of function
2255-
# only valid for numeric
2256-
f = getattr(libgroupby, fname, None)
2257-
if f is not None and is_numeric:
2305+
# otherwise find dtype-specific version, falling back to object
2306+
for dt in [dtype_str, 'object']:
2307+
f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None)
2308+
if f is not None:
22582309
return f
22592310

2260-
# otherwise find dtype-specific version, falling back to object
2261-
for dt in [dtype_str, 'object']:
2262-
f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None)
2263-
if f is not None:
2264-
return f
2311+
def _get_cython_function(self, kind, how, values, is_numeric):
2312+
2313+
dtype_str = values.dtype.name
22652314

22662315
ftype = self._cython_functions[kind][how]
22672316

22682317
if isinstance(ftype, dict):
2269-
func = afunc = get_func(ftype['name'])
2318+
func = afunc = self._get_func(ftype['name'], dtype_str=dtype_str,
2319+
is_numeric=is_numeric)
22702320

22712321
# a sub-function
22722322
f = ftype.get('f')
@@ -2279,7 +2329,8 @@ def wrapper(*args, **kwargs):
22792329
func = wrapper
22802330

22812331
else:
2282-
func = get_func(ftype)
2332+
func = self._get_func(ftype, dtype_str=dtype_str,
2333+
is_numeric=is_numeric)
22832334

22842335
if func is None:
22852336
raise NotImplementedError("function is not implemented for this"

0 commit comments

Comments
 (0)