pandas-dev · jreback · Oct 10, 2020 · Aug 8, 2020 · Aug 23, 2020 · Aug 23, 2020
diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst
@@ -58,6 +58,7 @@ Bug fixes
 - Bug in :meth:`Series.astype` showing too much precision when casting from ``np.float32`` to string dtype (:issue:`36451`)
 - Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`)
 - Bug in :func:`cut` raising a ``ValueError`` when passed a :class:`Series` of labels with ``ordered=False`` (:issue:`36603`)
+- Bug in :meth:`DataFrameGroupBy.ffill` where a ``NaN`` group would return foward-filled values instead of ``NaN`` when ``dropna=True`` (:issue:`34725`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1847,16 +1847,30 @@ def _fill(self, direction, limit=None):
         if limit is None:
             limit = -1
 
-        return self._get_cythonized_result(
+        def _nan_group_gets_nan_values(values, *args):
+            if not self.dropna:
+                return values
+            in_nan_group = DataFrame(self.grouper.codes).eq(-1).any()
+            if in_nan_group.any():
+                filler = {np.datetime64: np.datetime64("NaT")}.get(
+                    values.dtype.type, np.nan
+                )
+                values[in_nan_group] = filler
+            return values
+
+        res = self._get_cythonized_result(
             "group_fillna_indexer",
             numeric_only=False,
             needs_mask=True,
             cython_dtype=np.dtype(np.int64),
             result_is_index=True,
             direction=direction,
             limit=limit,
+            post_processing=_nan_group_gets_nan_values,
         )
 
+        return res
+
     @Substitution(name="groupby")
     def pad(self, limit=None):
         """

diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -977,6 +977,76 @@ def test_ffill_bfill_non_unique_multilevel(func, expected_status):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("limit", [None, 1])
+@pytest.mark.parametrize("method", ["ffill", "bfill", "pad", "backfill"])
+@pytest.mark.parametrize("by", ["grp1", ["grp1"], ["grp1", "grp2"]])
+@pytest.mark.parametrize("has_nan", [[], ["grp1"], ["grp1", "grp2"]])
+def test_pad_handles_nan_groups(dropna, limit, method, by, has_nan):
+    # GH 34725
+
+    # Create two rows with many different dytypes. The first row will be in
+    # the 'good' group which never has a nan in the grouping column(s). The
+    # second row will be in the 'bad' grouping which sometimes has a nan in
+    # the group column(s).
+    rows = pd.DataFrame(
+        {
+            "int": pd.array([1, 2], dtype="Int64"),
+            "float": [0.1, 0.2],
+            "bool": pd.array([True, False], dtype="bool"),
+            "date": [pd.Timestamp(2010, 1, 1), pd.Timestamp(2020, 2, 2)],
+            "period": pd.array(
+                [pd.Period("2010-01"), pd.Period("2020-2")], dtype="period[M]"
+            ),
+            "obj": ["hello", "world"],
+            "cat": pd.Categorical(["a", "b"], categories=["a", "b", "c"]),
+        }
+    )
+
+    # Put those rows into a 10-row dataframe at rows 2 and 7. This will
+    # allows us to ffill and bfill the rows and confirm that our method is
+    # behaving as expected
+    ridx = pd.Series([None] * 10)
+    ridx[2] = 0
+    ridx[7] = 1
+    df = rows.reindex(ridx).reset_index(drop=True)
+
+    # Add the grouping column(s).
+    grps = pd.Series(["good"] * 5 + ["bad"] * 5)
+    if type(by) is list:
+        grps = pd.concat([grps] * len(by), axis=1)
+    df[by] = grps
+
+    # Our 'has_nan' arg sometimes lists more columns than we are actually
+    # grouping by (our 'by' arg), i.e. has_nan=['grp1', 'grp2'] when
+    # by=['grp1']. We can just reduce 'has_nan' to its intersection with 'by'.
+    by = [by] if type(by) is not list else by
+    has_nan = list(set(has_nan).intersection(set(by)))
+
+    # For the colunms that are in 'has_nan' replace 'bad' with 'nan'
+    df[has_nan] = df[has_nan].replace("bad", np.nan)
+
+    grouped = df.groupby(by=by, dropna=dropna)
+    result = getattr(grouped, method)(limit=limit)
+
+    # If dropna=True and 'bad' has been replaced by 'nan', then the second
+    # 5 rows will all be nan, which is what we want: the nan group contains
+    # only nan values
+    if dropna and (len(has_nan) > 0):
+        ridx[7] = None
+
+    # To get our expected/benchmark output, we ffill/bfill the rows directly
+    # (not via a groupby), so we don't want limit=None for this part. With 5
+    # rows per group and the value rows in positions 2&7, we ffill/bfill
+    #  with limit=2. If we use limit=None rows 2&7 will ffill/bfill into the
+    # other group
+    lim = 2 if limit is None else limit
+    ridx = getattr(ridx, method)(limit=lim)
+    expected = rows.reindex(ridx).reset_index(drop=True)
+
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("func", [np.any, np.all])
 def test_any_all_np_func(func):
     # GH 20653