Skip to content

Commit 2c62dc4

Browse files
committed
Connected GroupBy method to Cython fillna
1 parent 1f652a9 commit 2c62dc4

File tree

1 file changed

+82
-31
lines changed

1 file changed

+82
-31
lines changed

pandas/core/groupby.py

+82-31
Original file line numberDiff line numberDiff line change
@@ -877,21 +877,28 @@ def apply(self, func, *args, **kwargs):
877877

878878
func = self._is_builtin_func(func)
879879

880-
# this is needed so we don't try and wrap strings. If we could
881-
# resolve functions to their callable functions prior, this
882-
# wouldn't be needed
883-
if args or kwargs:
884-
if callable(func):
885-
886-
@wraps(func)
887-
def f(g):
888-
with np.errstate(all='ignore'):
889-
return func(g, *args, **kwargs)
880+
# Try to go down the Cython path first
881+
try:
882+
f = self.grouper._cython_functions['apply'][func]
883+
return self.grouper._cython_apply(f, self._selected_obj, self.axis,
884+
**kwargs)
885+
except KeyError:
886+
# this is needed so we don't try and wrap strings. If we could
887+
# resolve functions to their callable functions prior, this
888+
# wouldn't be needed
889+
if args or kwargs:
890+
if callable(func):
891+
892+
@wraps(func)
893+
def f(g):
894+
with np.errstate(all='ignore'):
895+
return func(g, *args, **kwargs)
896+
else:
897+
raise ValueError('func must be a callable if args or '
898+
'kwargs are supplied and func is not '
899+
'implemented in Cython')
890900
else:
891-
raise ValueError('func must be a callable if args or '
892-
'kwargs are supplied')
893-
else:
894-
f = func
901+
f = func
895902

896903
# ignore SettingWithCopy here in case the user mutates
897904
with option_context('mode.chained_assignment', None):
@@ -1474,7 +1481,7 @@ def pad(self, limit=None):
14741481
Series.fillna
14751482
DataFrame.fillna
14761483
"""
1477-
return self.apply(lambda x: x.ffill(limit=limit))
1484+
return self.apply('ffill', limit=limit)
14781485
ffill = pad
14791486

14801487
@Substitution(name='groupby')
@@ -1494,7 +1501,7 @@ def backfill(self, limit=None):
14941501
Series.fillna
14951502
DataFrame.fillna
14961503
"""
1497-
return self.apply(lambda x: x.bfill(limit=limit))
1504+
return self.apply('bfill', limit=limit)
14981505
bfill = backfill
14991506

15001507
@Substitution(name='groupby')
@@ -2034,6 +2041,32 @@ def _get_group_keys(self):
20342041
self.levels,
20352042
self.labels)
20362043

2044+
def _cython_apply(self, f, data, axis, **kwargs):
2045+
output = collections.OrderedDict()
2046+
for col in data.columns:
2047+
if col in self.names:
2048+
output[col] = data[col].values
2049+
else:
2050+
# duplicative of _get_cython_function; needs refactor
2051+
dtype_str = data[col].dtype.name
2052+
values = data[col].values[:, None]
2053+
func = afunc = self._get_func(f['name'], dtype_str)
2054+
f = f.get('f')
2055+
2056+
def wrapper(*args, **kwargs):
2057+
return f(afunc, *args, **kwargs)
2058+
2059+
func = wrapper
2060+
labels, _, _ = self.group_info
2061+
2062+
result = _maybe_fill(np.empty_like(values, dtype=dtype_str),
2063+
fill_value=np.nan)
2064+
func(result, values, labels, **kwargs)
2065+
output[col] = result[:, 0]
2066+
2067+
# Ugh
2068+
return DataFrame(output, index=data.index)
2069+
20372070
def apply(self, f, data, axis=0):
20382071
mutated = self.mutated
20392072
splitter = self._get_splitter(data, axis=axis)
@@ -2230,6 +2263,22 @@ def get_group_levels(self):
22302263
kwargs.get('na_option', 'keep')
22312264
)
22322265
}
2266+
},
2267+
'apply': {
2268+
'ffill': {
2269+
'name': 'group_fillna',
2270+
'f': lambda func, a, b, c, **kwargs: func(
2271+
a, b, c,
2272+
'ffill', kwargs['limit'] if kwargs['limit'] else -1
2273+
)
2274+
},
2275+
'bfill': {
2276+
'name': 'group_fillna',
2277+
'f': lambda func, a, b, c, **kwargs: func(
2278+
a, b, c,
2279+
'bfill', kwargs['limit'] if kwargs['limit'] else -1
2280+
)
2281+
}
22332282
}
22342283
}
22352284

@@ -2248,27 +2297,28 @@ def _is_builtin_func(self, arg):
22482297
"""
22492298
return SelectionMixin._builtin_table.get(arg, arg)
22502299

2251-
def _get_cython_function(self, kind, how, values, is_numeric):
2252-
2253-
dtype_str = values.dtype.name
2300+
def _get_func(self, fname, dtype_str=None, is_numeric=False):
2301+
# see if there is a fused-type version of function
2302+
# only valid for numeric
2303+
f = getattr(libgroupby, fname, None)
2304+
if f is not None and is_numeric:
2305+
return f
22542306

2255-
def get_func(fname):
2256-
# see if there is a fused-type version of function
2257-
# only valid for numeric
2258-
f = getattr(libgroupby, fname, None)
2259-
if f is not None and is_numeric:
2307+
# otherwise find dtype-specific version, falling back to object
2308+
for dt in [dtype_str, 'object']:
2309+
f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None)
2310+
if f is not None:
22602311
return f
22612312

2262-
# otherwise find dtype-specific version, falling back to object
2263-
for dt in [dtype_str, 'object']:
2264-
f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None)
2265-
if f is not None:
2266-
return f
2313+
def _get_cython_function(self, kind, how, values, is_numeric):
2314+
2315+
dtype_str = values.dtype.name
22672316

22682317
ftype = self._cython_functions[kind][how]
22692318

22702319
if isinstance(ftype, dict):
2271-
func = afunc = get_func(ftype['name'])
2320+
func = afunc = self._get_func(ftype['name'], dtype_str=dtype_str,
2321+
is_numeric=is_numeric)
22722322

22732323
# a sub-function
22742324
f = ftype.get('f')
@@ -2281,7 +2331,8 @@ def wrapper(*args, **kwargs):
22812331
func = wrapper
22822332

22832333
else:
2284-
func = get_func(ftype)
2334+
func = self._get_func(ftype, dtype_str=dtype_str,
2335+
is_numeric=is_numeric)
22852336

22862337
if func is None:
22872338
raise NotImplementedError("function is not implemented for this"

0 commit comments

Comments
 (0)