Skip to content

Commit 952c364

Browse files
WillAydjorisvandenbossche
authored andcommitted
Stable Sorting Algorithm for Fillna Indexer (#21212)
(cherry picked from commit d30cc74)
1 parent 18ae84c commit 952c364

File tree

3 files changed

+20
-1
lines changed

3 files changed

+20
-1
lines changed

doc/source/whatsnew/v0.23.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Groupby/Resample/Rolling
5050
^^^^^^^^^^^^^^^^^^^^^^^^
5151

5252
- Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`)
53+
- Bug in :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` where the fill within a grouping would not always be applied as intended due to the implementations' use of a non-stable sort (:issue:`21207`)
5354

5455
Strings
5556
^^^^^^^

pandas/_libs/groupby.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,8 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
297297
# Make sure all arrays are the same size
298298
assert N == len(labels) == len(mask)
299299

300-
sorted_labels = np.argsort(labels).astype(np.int64, copy=False)
300+
sorted_labels = np.argsort(labels, kind='mergesort').astype(
301+
np.int64, copy=False)
301302
if direction == 'bfill':
302303
sorted_labels = sorted_labels[::-1]
303304

pandas/tests/groupby/test_transform.py

+17
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,23 @@ def interweave(list_obj):
721721
assert_frame_equal(result, exp)
722722

723723

724+
@pytest.mark.parametrize("fill_method", ['ffill', 'bfill'])
725+
def test_pad_stable_sorting(fill_method):
726+
# GH 21207
727+
x = [0] * 20
728+
y = [np.nan] * 10 + [1] * 10
729+
730+
if fill_method == 'bfill':
731+
y = y[::-1]
732+
733+
df = pd.DataFrame({'x': x, 'y': y})
734+
expected = df.copy()
735+
736+
result = getattr(df.groupby('x'), fill_method)()
737+
738+
tm.assert_frame_equal(result, expected)
739+
740+
724741
@pytest.mark.parametrize("test_series", [True, False])
725742
@pytest.mark.parametrize("periods,fill_method,limit", [
726743
(1, 'ffill', None), (1, 'ffill', 1),

0 commit comments

Comments
 (0)