From 8dbd94852db38847d91c49890215ea6dbbda94f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Lucas=20Silva=20Mayer?= Date: Wed, 4 Oct 2023 16:57:30 -0300 Subject: [PATCH 1/3] add new automated test with ffill on a multi-index dataframe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This test checks whether the ffill method works correctly on a multi-index dataframe. In it, we define a dataframe with indexes [0, 1, 2, 0, 1, 2] and column "a" with values [1, 2, NaN, 3, 4, 5]. We group this dataframe by columns (level = 0) and shift it by one, so we have: 0: [1, 3] -> [NaN, 1] 1: [2, 4] -> [NaN, 2] 2: [NaN, 5] -> [NaN, NaN] Then, since index order remain the same, if we apply ffill method, it should give us a dataframe with column "a" equal to [NaN, NaN, NaN, 1, 2, 2]. Co-authored-by: José Lucas Silva Mayer Co-authored-by: Willian Wang --- pandas/tests/groupby/test_groupby.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4ca8b0e317bd2..4b450b2cfc401 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3245,3 +3245,14 @@ def test_get_group_axis_1(): } ) tm.assert_frame_equal(result, expected) + + +def test_groupby_ffill_with_duplicated_index(): + # GH#43412 + df = DataFrame({"a": [1, 2, np.nan, 3, 4, 5]}, index=[0, 1, 2, 0, 1, 2]) + + result = df.groupby(level=0).shift().ffill() + expected = DataFrame( + {"a": [np.nan, np.nan, np.nan, 1, 2, 2]}, index=[0, 1, 2, 0, 1, 2] + ) + tm.assert_frame_equal(result, expected) From 5bf71730dbb91af6cabb8614f082ea9130882da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Lucas=20Silva=20Mayer?= Date: Thu, 5 Oct 2023 22:50:10 -0300 Subject: [PATCH 2/3] remove shift and add cast dataframe dtype to float MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: José Lucas Silva Mayer --- pandas/tests/groupby/test_groupby.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4b450b2cfc401..0288aa60eb2e0 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3249,10 +3249,12 @@ def test_get_group_axis_1(): def test_groupby_ffill_with_duplicated_index(): # GH#43412 - df = DataFrame({"a": [1, 2, np.nan, 3, 4, 5]}, index=[0, 1, 2, 0, 1, 2]) + df = DataFrame( + {"a": [1, 2, 3, 4, np.nan, np.nan]}, index=[0, 1, 2, 0, 1, 2], dtype=float + ) - result = df.groupby(level=0).shift().ffill() + result = df.groupby(level=0).ffill() expected = DataFrame( - {"a": [np.nan, np.nan, np.nan, 1, 2, 2]}, index=[0, 1, 2, 0, 1, 2] + {"a": [1, 2, 3, 4, 2, 3]}, index=[0, 1, 2, 0, 1, 2], dtype=float ) tm.assert_frame_equal(result, expected) From 28aa1e5b10bfbc0d2ed6f3fe0f190855129f8d06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Lucas=20Silva=20Mayer?= Date: Sun, 8 Oct 2023 01:00:25 -0300 Subject: [PATCH 3/3] remove check of dataframe dtype on assert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: José Lucas Silva Mayer --- pandas/tests/groupby/test_groupby.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0288aa60eb2e0..2d5e03df78d2f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3249,12 +3249,8 @@ def test_get_group_axis_1(): def test_groupby_ffill_with_duplicated_index(): # GH#43412 - df = DataFrame( - {"a": [1, 2, 3, 4, np.nan, np.nan]}, index=[0, 1, 2, 0, 1, 2], dtype=float - ) + df = DataFrame({"a": [1, 2, 3, 4, np.nan, np.nan]}, index=[0, 1, 2, 0, 1, 2]) result = df.groupby(level=0).ffill() - expected = DataFrame( - {"a": [1, 2, 3, 4, 2, 3]}, index=[0, 1, 2, 0, 1, 2], dtype=float - ) - tm.assert_frame_equal(result, expected) + expected = DataFrame({"a": [1, 2, 3, 4, 2, 3]}, index=[0, 1, 2, 0, 1, 2]) + tm.assert_frame_equal(result, expected, check_dtype=False)