diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 4876678baaa6e..f400de5f1ad02 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -52,6 +52,7 @@ Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`) +- Bug in :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` where the fill within a grouping would not always be applied as intended due to the implementations' use of a non-stable sort (:issue:`21207`) Strings ^^^^^^^ diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 43afd1e0f5969..a6dbaff17e543 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -297,7 +297,8 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, # Make sure all arrays are the same size assert N == len(labels) == len(mask) - sorted_labels = np.argsort(labels).astype(np.int64, copy=False) + sorted_labels = np.argsort(labels, kind='mergesort').astype( + np.int64, copy=False) if direction == 'bfill': sorted_labels = sorted_labels[::-1] diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 626057c1ea760..7fccf1f57a886 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -721,6 +721,23 @@ def interweave(list_obj): assert_frame_equal(result, exp) +@pytest.mark.parametrize("fill_method", ['ffill', 'bfill']) +def test_pad_stable_sorting(fill_method): + # GH 21207 + x = [0] * 20 + y = [np.nan] * 10 + [1] * 10 + + if fill_method == 'bfill': + y = y[::-1] + + df = pd.DataFrame({'x': x, 'y': y}) + expected = df.copy() + + result = getattr(df.groupby('x'), fill_method)() + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("test_series", [True, False]) @pytest.mark.parametrize("periods,fill_method,limit", [ (1, 'ffill', None), (1, 'ffill', 1),