Skip to content

Commit 389be17

Browse files
authored
BUG: GroupBy.ffill()/bfill() do not return NaN values for NaN groups (#36790)
1 parent 81e47fd commit 389be17

File tree

4 files changed

+41
-2
lines changed

4 files changed

+41
-2
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ Groupby/resample/rolling
445445
- Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`)
446446
- Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`)
447447
- Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`)
448+
- Bug in :meth:`DataFrameGroupBy.ffill` and :meth:`DataFrameGroupBy.bfill` where a ``NaN`` group would return filled values instead of ``NaN`` when ``dropna=True`` (:issue:`34725`)
448449
- Bug in :meth:`RollingGroupby.count` where a ``ValueError`` was raised when specifying the ``closed`` parameter (:issue:`35869`)
449450
- Bug in :meth:`DataFrame.groupby.rolling` returning wrong values with partial centered window (:issue:`36040`).
450451

pandas/_libs/groupby.pyx

+5-2
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ def group_shift_indexer(int64_t[:] out, const int64_t[:] labels,
344344
@cython.boundscheck(False)
345345
def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
346346
ndarray[uint8_t] mask, object direction,
347-
int64_t limit):
347+
int64_t limit, bint dropna):
348348
"""
349349
Indexes how to fill values forwards or backwards within a group.
350350
@@ -358,6 +358,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
358358
direction : {'ffill', 'bfill'}
359359
Direction for fill to be applied (forwards or backwards, respectively)
360360
limit : Consecutive values to fill before stopping, or -1 for no limit
361+
dropna : Flag to indicate if NaN groups should return all NaN values
361362
362363
Notes
363364
-----
@@ -381,7 +382,9 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
381382
with nogil:
382383
for i in range(N):
383384
idx = sorted_labels[i]
384-
if mask[idx] == 1: # is missing
385+
if dropna and labels[idx] == -1: # nan-group gets nan-values
386+
curr_fill_idx = -1
387+
elif mask[idx] == 1: # is missing
385388
# Stop filling once we've hit the limit
386389
if filled_vals >= limit and limit != -1:
387390
curr_fill_idx = -1

pandas/core/groupby/groupby.py

+1
Original file line numberDiff line numberDiff line change
@@ -1866,6 +1866,7 @@ def _fill(self, direction, limit=None):
18661866
result_is_index=True,
18671867
direction=direction,
18681868
limit=limit,
1869+
dropna=self.dropna,
18691870
)
18701871

18711872
@Substitution(name="groupby")

pandas/tests/groupby/test_missing.py

+34
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,37 @@ def test_fill_consistency():
8282
expected = df.groupby(level=0, axis=0).fillna(method="ffill")
8383
result = df.T.groupby(level=0, axis=1).fillna(method="ffill").T
8484
tm.assert_frame_equal(result, expected)
85+
86+
87+
@pytest.mark.parametrize("method", ["ffill", "bfill"])
88+
@pytest.mark.parametrize("dropna", [True, False])
89+
@pytest.mark.parametrize("has_nan_group", [True, False])
90+
def test_ffill_handles_nan_groups(dropna, method, has_nan_group):
91+
# GH 34725
92+
93+
df_without_nan_rows = pd.DataFrame([(1, 0.1), (2, 0.2)])
94+
95+
ridx = [-1, 0, -1, -1, 1, -1]
96+
df = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
97+
98+
group_b = np.nan if has_nan_group else "b"
99+
df["group_col"] = pd.Series(["a"] * 3 + [group_b] * 3)
100+
101+
grouped = df.groupby(by="group_col", dropna=dropna)
102+
result = getattr(grouped, method)(limit=None)
103+
104+
expected_rows = {
105+
("ffill", True, True): [-1, 0, 0, -1, -1, -1],
106+
("ffill", True, False): [-1, 0, 0, -1, 1, 1],
107+
("ffill", False, True): [-1, 0, 0, -1, 1, 1],
108+
("ffill", False, False): [-1, 0, 0, -1, 1, 1],
109+
("bfill", True, True): [0, 0, -1, -1, -1, -1],
110+
("bfill", True, False): [0, 0, -1, 1, 1, -1],
111+
("bfill", False, True): [0, 0, -1, 1, 1, -1],
112+
("bfill", False, False): [0, 0, -1, 1, 1, -1],
113+
}
114+
115+
ridx = expected_rows.get((method, dropna, has_nan_group))
116+
expected = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
117+
118+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)