Skip to content

Commit 3b24fb6

Browse files
adbullWillAyd
authored andcommitted
API: groupby ffill adds labels as extra column (#21521) (#26162)
1 parent e5d15b2 commit 3b24fb6

File tree

4 files changed

+45
-13
lines changed

4 files changed

+45
-13
lines changed

doc/source/whatsnew/v0.25.0.rst

+31
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,37 @@ Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will
154154
cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.
155155

156156

157+
``DataFrame`` groupby ffill/bfill no longer return group labels
158+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
159+
160+
The methods ``ffill``, ``bfill``, ``pad`` and ``backfill`` of
161+
:class:`DataFrameGroupBy <pandas.core.groupby.DataFrameGroupBy>`
162+
previously included the group labels in the return value, which was
163+
inconsistent with other groupby transforms. Now only the filled values
164+
are returned. (:issue:`21521`)
165+
166+
.. ipython:: python
167+
168+
df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]})
169+
df
170+
171+
*Previous Behaviour*:
172+
173+
.. code-block:: python
174+
175+
In [3]: df.groupby("a").ffill()
176+
Out[3]:
177+
a b
178+
0 x 1
179+
1 y 2
180+
181+
*New Behaviour*:
182+
183+
.. ipython:: python
184+
185+
df.groupby("a").ffill()
186+
187+
157188
.. _whatsnew_0250.api_breaking.deps:
158189

159190
Increased minimum versions for dependencies

pandas/core/groupby/generic.py

-9
Original file line numberDiff line numberDiff line change
@@ -1480,15 +1480,6 @@ def _apply_to_column_groupbys(self, func):
14801480
in self._iterate_column_groupbys()),
14811481
keys=self._selected_obj.columns, axis=1)
14821482

1483-
def _fill(self, direction, limit=None):
1484-
"""Overridden method to join grouped columns in output"""
1485-
res = super()._fill(direction, limit=limit)
1486-
output = OrderedDict(
1487-
(grp.name, grp.grouper) for grp in self.grouper.groupings)
1488-
1489-
from pandas import concat
1490-
return concat((self._wrap_transformed_output(output), res), axis=1)
1491-
14921483
def count(self):
14931484
"""
14941485
Compute count of group, excluding missing values.

pandas/core/groupby/groupby.py

-1
Original file line numberDiff line numberDiff line change
@@ -2235,7 +2235,6 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
22352235
limit=limit, freq=freq,
22362236
axis=axis))
22372237
filled = getattr(self, fill_method)(limit=limit)
2238-
filled = filled.drop(self.grouper.names, axis=1)
22392238
fill_grp = filled.groupby(self.grouper.labels)
22402239
shifted = fill_grp.shift(periods=periods, freq=freq)
22412240
return (filled / shifted) - 1

pandas/tests/groupby/test_transform.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ def interweave(list_obj):
749749
assert_series_equal(result, exp)
750750
else:
751751
result = getattr(df.groupby('key'), fill_method)(limit=limit)
752-
exp = DataFrame({'key': keys, 'val': _exp_vals})
752+
exp = DataFrame({'val': _exp_vals})
753753
assert_frame_equal(result, exp)
754754

755755

@@ -763,7 +763,7 @@ def test_pad_stable_sorting(fill_method):
763763
y = y[::-1]
764764

765765
df = pd.DataFrame({'x': x, 'y': y})
766-
expected = df.copy()
766+
expected = df.drop('x', 1)
767767

768768
result = getattr(df.groupby('x'), fill_method)()
769769

@@ -789,7 +789,7 @@ def test_pct_change(test_series, freq, periods, fill_method, limit):
789789
df = DataFrame({'key': key_v, 'vals': vals * 2})
790790

791791
df_g = getattr(df.groupby('key'), fill_method)(limit=limit)
792-
grp = df_g.groupby('key')
792+
grp = df_g.groupby(df.key)
793793

794794
expected = grp['vals'].obj / grp['vals'].shift(periods) - 1
795795

@@ -880,3 +880,14 @@ def test_transform_absent_categories(func):
880880
result = getattr(df.y.groupby(df.x), func)()
881881
expected = df.y
882882
assert_series_equal(result, expected)
883+
884+
885+
@pytest.mark.parametrize('func', ['ffill', 'bfill', 'shift'])
886+
@pytest.mark.parametrize('key, val', [('level', 0), ('by', Series([0]))])
887+
def test_ffill_not_in_axis(func, key, val):
888+
# GH 21521
889+
df = pd.DataFrame([[np.nan]])
890+
result = getattr(df.groupby(**{key: val}), func)()
891+
expected = df
892+
893+
assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)