From be005f0dd7fdf1818f6aeddc3a83cdf723505bbc Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 25 May 2018 14:53:28 -0700 Subject: [PATCH 1/3] Added failing test case --- pandas/tests/groupby/test_transform.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 626057c1ea760..a5969b93f28fb 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -720,6 +720,22 @@ def interweave(list_obj): exp = DataFrame({'key': keys, 'val': _exp_vals}) assert_frame_equal(result, exp) +@pytest.mark.parametrize("fill_method", ['ffill', 'bfill']) +def test_pad_stable_sorting(fill_method): + # GH 21207 + x = [0] * 20 + y = [np.nan] * 10 + [1] * 10 + + if fill_method == 'bfill': + y = y[::-1] + + df = pd.DataFrame({'x': x, 'y': y}) + expected = df.copy() + + result = getattr(df.groupby('x'), fill_method)() + + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("test_series", [True, False]) @pytest.mark.parametrize("periods,fill_method,limit", [ From 5f8fdf4ed3306826916cd9be521d5a26e7d645dc Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 25 May 2018 14:55:50 -0700 Subject: [PATCH 2/3] Changed argort kind argument in fillna_indexer --- pandas/_libs/groupby.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 43afd1e0f5969..a6dbaff17e543 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -297,7 +297,8 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, # Make sure all arrays are the same size assert N == len(labels) == len(mask) - sorted_labels = np.argsort(labels).astype(np.int64, copy=False) + sorted_labels = np.argsort(labels, kind='mergesort').astype( + np.int64, copy=False) if direction == 'bfill': sorted_labels = sorted_labels[::-1] From 617ae08fc50934e7b67d46003898807198712f3d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 25 May 2018 16:18:24 -0700 Subject: [PATCH 3/3] LINT fixup and whatsnew --- doc/source/whatsnew/v0.23.1.txt | 1 + pandas/tests/groupby/test_transform.py | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 4876678baaa6e..f400de5f1ad02 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -52,6 +52,7 @@ Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`) +- Bug in :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` where the fill within a grouping would not always be applied as intended due to the implementations' use of a non-stable sort (:issue:`21207`) Strings ^^^^^^^ diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index a5969b93f28fb..7fccf1f57a886 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -720,6 +720,7 @@ def interweave(list_obj): exp = DataFrame({'key': keys, 'val': _exp_vals}) assert_frame_equal(result, exp) + @pytest.mark.parametrize("fill_method", ['ffill', 'bfill']) def test_pad_stable_sorting(fill_method): # GH 21207