pandas-dev · jreback · Oct 10, 2020 · Aug 8, 2020 · Aug 23, 2020 · Aug 23, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -413,6 +413,7 @@ Groupby/resample/rolling
 - Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`)
 - Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`)
 - Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`)
+- Bug in :meth:`DataFrameGroupBy.ffill` where a ``NaN`` group would return foward-filled values instead of ``NaN`` when ``dropna=True`` (:issue:`34725`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -344,7 +344,7 @@ def group_shift_indexer(int64_t[:] out, const int64_t[:] labels,
 @cython.boundscheck(False)
 def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
                          ndarray[uint8_t] mask, object direction,
-                         int64_t limit):
+                         int64_t limit, bint dropna):
     """
     Indexes how to fill values forwards or backwards within a group.
 
@@ -358,6 +358,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
     direction : {'ffill', 'bfill'}
         Direction for fill to be applied (forwards or backwards, respectively)
     limit : Consecutive values to fill before stopping, or -1 for no limit
+    dropna : Flag to indicate if NaN groups should return all NaN values
 
     Notes
     -----
@@ -389,6 +390,8 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
             else:  # reset items when not missing
                 filled_vals = 0
                 curr_fill_idx = idx
+            if dropna and labels[idx] == -1:
+                curr_fill_idx = -1
 
             out[idx] = curr_fill_idx
 

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1855,6 +1855,7 @@ def _fill(self, direction, limit=None):
             result_is_index=True,
             direction=direction,
             limit=limit,
+            dropna=self.dropna,
         )
 
     @Substitution(name="groupby")

diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -977,6 +977,40 @@ def test_ffill_bfill_non_unique_multilevel(func, expected_status):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize("method", ["ffill", "bfill"])
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("has_nan_group", [True, False])
+def test_ffill_handles_nan_groups(dropna, method, has_nan_group):
+    # GH 34725
+
+    df_without_nan_rows = pd.DataFrame([(1, 0.1), (2, 0.2)])
+
+    ridx = [-1, 0, -1, -1, 1, -1]
+    df = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
+
+    group_b = np.nan if has_nan_group else "b"
+    df["group_col"] = pd.Series(["a"] * 3 + [group_b] * 3)
+
+    grouped = df.groupby(by="group_col", dropna=dropna)
+    result = getattr(grouped, method)(limit=None)
+
+    expected_rows = {
+        ("ffill", True, True): [-1, 0, 0, -1, -1, -1],
+        ("ffill", True, False): [-1, 0, 0, -1, 1, 1],
+        ("ffill", False, True): [-1, 0, 0, -1, 1, 1],
+        ("ffill", False, False): [-1, 0, 0, -1, 1, 1],
+        ("bfill", True, True): [0, 0, -1, -1, -1, -1],
+        ("bfill", True, False): [0, 0, -1, 1, 1, -1],
+        ("bfill", False, True): [0, 0, -1, 1, 1, -1],
+        ("bfill", False, False): [0, 0, -1, 1, 1, -1],
+    }
+
+    ridx = expected_rows.get((method, dropna, has_nan_group))
+    expected = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
+
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("func", [np.any, np.all])
 def test_any_all_np_func(func):
     # GH 20653