Skip to content

Commit 5e007f8

Browse files
committed
Fixed issue when filling Series after GroupBy
1 parent 5074198 commit 5e007f8

File tree

2 files changed

+38
-25
lines changed

2 files changed

+38
-25
lines changed

pandas/core/groupby.py

+27-21
Original file line numberDiff line numberDiff line change
@@ -2039,31 +2039,37 @@ def _get_group_keys(self):
20392039
self.levels,
20402040
self.labels)
20412041

2042-
def _cython_apply(self, f, data, axis, **kwargs):
2043-
output = collections.OrderedDict()
2044-
for col in data.columns:
2045-
if col in self.names:
2046-
output[col] = data[col].values
2047-
else:
2048-
# duplicative of _get_cython_function; needs refactor
2049-
dtype_str = data[col].dtype.name
2050-
values = data[col].values[:, None]
2051-
func = afunc = self._get_func(f['name'], dtype_str)
2052-
f = f.get('f')
2042+
def _cython_apply(self, ftype, data, axis, **kwargs):
2043+
def _generate_output(ser):
2044+
# duplicative of _get_cython_function; needs refactor
2045+
dtype_str = ser.dtype.name
2046+
values = ser.values[:, None]
2047+
func = afunc = self._get_func(ftype['name'], dtype_str)
2048+
f = ftype.get('f')
20532049

2054-
def wrapper(*args, **kwargs):
2055-
return f(afunc, *args, **kwargs)
2050+
def wrapper(*args, **kwargs):
2051+
return f(afunc, *args, **kwargs)
20562052

2057-
func = wrapper
2058-
labels, _, _ = self.group_info
2053+
func = wrapper
2054+
labels, _, _ = self.group_info
2055+
2056+
result = _maybe_fill(np.empty_like(values, dtype=dtype_str),
2057+
fill_value=np.nan)
2058+
func(result, values, labels, **kwargs)
20592059

2060-
result = _maybe_fill(np.empty_like(values, dtype=dtype_str),
2061-
fill_value=np.nan)
2062-
func(result, values, labels, **kwargs)
2063-
output[col] = result[:, 0]
2060+
return result[:, 0]
20642061

2065-
# Ugh
2066-
return DataFrame(output, index=data.index)
2062+
# Using introspection to determine result; not ideal needs refactor
2063+
if type(data) is Series:
2064+
return Series(_generate_output(data), name=data.name)
2065+
else:
2066+
output = collections.OrderedDict()
2067+
for col in data.columns:
2068+
if col in self.names:
2069+
output[col] = data[col].values
2070+
else:
2071+
output[col] = _generate_output(data[col])
2072+
return DataFrame(output, index=data.index)
20672073

20682074
def apply(self, f, data, axis=0):
20692075
mutated = self.mutated

pandas/tests/groupby/test_groupby.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -2061,6 +2061,7 @@ def test_rank_object_raises(self, ties_method, ascending, na_option,
20612061
ascending=ascending,
20622062
na_option=na_option, pct=pct)
20632063

2064+
@pytest.mark.parametrize("as_series", [True, False])
20642065
@pytest.mark.parametrize("fill_method,limit,exp_vals", [
20652066
("ffill", None,
20662067
[np.nan, np.nan, 'foo', 'foo', 'foo', 'bar', 'bar', 'bar']),
@@ -2071,14 +2072,20 @@ def test_rank_object_raises(self, ties_method, ascending, na_option,
20712072
("bfill", 1,
20722073
[np.nan, 'foo', 'foo', np.nan, 'bar', 'bar', np.nan, np.nan])
20732074
])
2074-
def test_group_fill_methods(self, fill_method, limit, exp_vals):
2075+
def test_group_fill_methods(self, as_series, fill_method, limit, exp_vals):
20752076
vals = [np.nan, np.nan, 'foo', np.nan, np.nan, 'bar', np.nan, np.nan]
20762077
keys = ['a'] * len(vals) + ['b'] * len(vals)
20772078
df = DataFrame({'key': keys, 'val': vals * 2})
2078-
result = getattr(df.groupby('key'), fill_method)(limit=limit)
20792079

2080-
exp = DataFrame({'key': keys, 'val': exp_vals * 2})
2081-
assert_frame_equal(result, exp)
2080+
if as_series:
2081+
result = getattr(
2082+
df.groupby('key')['val'], fill_method)(limit=limit)
2083+
exp = Series(exp_vals * 2, name='val')
2084+
assert_series_equal(result, exp)
2085+
else:
2086+
result = getattr(df.groupby('key'), fill_method)(limit=limit)
2087+
exp = DataFrame({'key': keys, 'val': exp_vals * 2})
2088+
assert_frame_equal(result, exp)
20822089

20832090
def test_dont_clobber_name_column(self):
20842091
df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'],

0 commit comments

Comments
 (0)