Skip to content

Commit 0252697

Browse files
Adamabull
Adam
authored and
abull
committed
BUG: groupby ffill adds labels as extra column (pandas-dev#21521)
1 parent d74901b commit 0252697

File tree

4 files changed

+21
-2
lines changed

4 files changed

+21
-2
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ Groupby/Resample/Rolling
389389
- Bug in :meth:`pandas.core.window.Rolling.min` and :meth:`pandas.core.window.Rolling.max` that caused a memory leak (:issue:`25893`)
390390
- Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`)
391391
- Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`)
392+
- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.ffill` and :meth:`pandas.core.groupby.DataFrameGroupBy.bfill` when group labels are not in frame, would concat them with the return value. (:issue:`21521`)
392393

393394
Reshaping
394395
^^^^^^^^^

pandas/core/groupby/generic.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1472,8 +1472,14 @@ def _apply_to_column_groupbys(self, func):
14721472
def _fill(self, direction, limit=None):
14731473
"""Overridden method to join grouped columns in output"""
14741474
res = super(DataFrameGroupBy, self)._fill(direction, limit=limit)
1475+
1476+
# Columns with group labels get removed by GroupBy._fill before calling
1477+
# into cython; if they're supposed to be part of the output, we have to
1478+
# add them back here. We add all group labels which came from the
1479+
# input DataFrame, and are in the list of selected columns.
14751480
output = OrderedDict(
1476-
(grp.name, grp.grouper) for grp in self.grouper.groupings)
1481+
(grp.name, grp.grouper) for grp in self.grouper.groupings
1482+
if grp.in_axis and grp.name in self._selected_obj)
14771483

14781484
from pandas import concat
14791485
return concat((self._wrap_transformed_output(output), res), axis=1)

pandas/core/groupby/groupby.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2127,7 +2127,8 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
21272127
limit=limit, freq=freq,
21282128
axis=axis))
21292129
filled = getattr(self, fill_method)(limit=limit)
2130-
filled = filled.drop(self.grouper.names, axis=1)
2130+
filled = filled.drop(filled.columns.intersection(self.grouper.names),
2131+
axis=1)
21312132
fill_grp = filled.groupby(self.grouper.labels)
21322133
shifted = fill_grp.shift(periods=periods, freq=freq)
21332134
return (filled / shifted) - 1

pandas/tests/groupby/test_transform.py

+11
Original file line numberDiff line numberDiff line change
@@ -880,3 +880,14 @@ def test_transform_absent_categories(func):
880880
result = getattr(df.y.groupby(df.x), func)()
881881
expected = df.y
882882
assert_series_equal(result, expected)
883+
884+
885+
@pytest.mark.parametrize('func', ['ffill', 'bfill'])
886+
@pytest.mark.parametrize('key, val', [('level', 0), ('by', Series([0]))])
887+
def test_ffill_not_in_axis(func, key, val):
888+
# GH 21521
889+
df = pd.DataFrame([[0]])
890+
result = getattr(df.groupby(**{key: val}), func)()
891+
expected = df
892+
893+
assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)