From ff300e889040f2d7567edefe1e7098cf3b834bff Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Fri, 2 Dec 2022 17:32:36 -0800 Subject: [PATCH 1/3] test: adding new test for groupby cumsum with named aggregate --- pandas/tests/extension/base/groupby.py | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py index 1f46442ee13b0..3c7dff06c74a2 100644 --- a/pandas/tests/extension/base/groupby.py +++ b/pandas/tests/extension/base/groupby.py @@ -56,6 +56,39 @@ def test_groupby_agg_extension(self, data_for_grouping): result = df.groupby("A").first() self.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "expected", + [ + pd.DataFrame( + { + "td": { + 0: pd.Timedelta("0 days 01:00:00"), + 1: pd.Timedelta("0 days 01:15:00"), + 2: pd.Timedelta("0 days 01:15:00"), + } + } + ) + ], + ) + def test_groupby_agg_extension_timedelta_cumsum_with_named_aggregation( + self, expected + ): + ts = pd.DatetimeIndex( + [ + pd.Timestamp("2021/01/01 00:30"), + pd.Timestamp("2021/01/01 00:45"), + pd.Timestamp("2021/01/01 02:00"), + ] + ) + df = pd.DataFrame({"value": [1, 2, 3], "ts": ts}) + df["td"] = df["ts"] - df["ts"].shift(1, fill_value=ts[0] - pd.Timedelta("1h")) + df["amount"] = df["value"] * 10 + df.loc[:2, "grps"] = "a" + df.loc[2:, "grps"] = "b" + agg_rules = {"td": ("td", "cumsum")} + result = df.groupby("grps").agg(**agg_rules) + self.assert_frame_equal(result, expected) + def test_groupby_extension_no_sort(self, data_for_grouping): df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) result = df.groupby("B", sort=False).A.mean() From d12629013a9e68981b7715bd0f53d8851f8bdb0f Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Mon, 5 Dec 2022 18:17:41 -0800 Subject: [PATCH 2/3] tst: updating tests to be clear on intent, and to be better written --- pandas/tests/extension/base/groupby.py | 59 +++++++++++++------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py index 8aba548b1c039..1c949eddfb0ae 100644 --- a/pandas/tests/extension/base/groupby.py +++ b/pandas/tests/extension/base/groupby.py @@ -56,37 +56,38 @@ def test_groupby_agg_extension(self, data_for_grouping): result = df.groupby("A").first() self.assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "expected", - [ - pd.DataFrame( - { - "td": { - 0: pd.Timedelta("0 days 01:00:00"), - 1: pd.Timedelta("0 days 01:15:00"), - 2: pd.Timedelta("0 days 01:15:00"), - } + def test_groupby_agg_extension_timedelta_cumsum_with_named_aggregation(self): + # GH#41720 + expected = pd.DataFrame( + { + "td": { + 0: pd.Timedelta("0 days 01:00:00"), + 1: pd.Timedelta("0 days 01:15:00"), + 2: pd.Timedelta("0 days 01:15:00"), } - ) - ], - ) - def test_groupby_agg_extension_timedelta_cumsum_with_named_aggregation( - self, expected - ): - ts = pd.DatetimeIndex( - [ - pd.Timestamp("2021/01/01 00:30"), - pd.Timestamp("2021/01/01 00:45"), - pd.Timestamp("2021/01/01 02:00"), - ] + } + ) + df = pd.DataFrame( + { + "value": pd.Series(["1", "2", "3"], dtype="int64"), + "ts": pd.Series( + [ + "2021-01-01 00:30:00", + "2021-01-01 00:45:00", + "2021-01-01 02:00:00", + ], + dtype="datetime64[ns]", + ), + "td": pd.Series( + ["0 days 01:00:00", "0 days 00:15:00", "0 days 01:15:00"], + dtype="timedelta64[ns]", + ), + "amount": pd.Series(["10", "20", "30"], dtype="int64"), + "grps": pd.Series(["a", "a", "b"], dtype="object"), + } ) - df = pd.DataFrame({"value": [1, 2, 3], "ts": ts}) - df["td"] = df["ts"] - df["ts"].shift(1, fill_value=ts[0] - pd.Timedelta("1h")) - df["amount"] = df["value"] * 10 - df.loc[:2, "grps"] = "a" - df.loc[2:, "grps"] = "b" - agg_rules = {"td": ("td", "cumsum")} - result = df.groupby("grps").agg(**agg_rules) + gb = df.groupby("grps") + result = gb.agg(td=("td", "cumsum")) self.assert_frame_equal(result, expected) def test_groupby_extension_no_sort(self, data_for_grouping): From 51581c74ed8c59b707666bad3e8095bd256a0e53 Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Tue, 6 Dec 2022 16:00:50 -0800 Subject: [PATCH 3/3] tst: simplifying test code for groupby test --- pandas/tests/extension/base/groupby.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py index 1c949eddfb0ae..3a9dbe9dfb384 100644 --- a/pandas/tests/extension/base/groupby.py +++ b/pandas/tests/extension/base/groupby.py @@ -69,21 +69,11 @@ def test_groupby_agg_extension_timedelta_cumsum_with_named_aggregation(self): ) df = pd.DataFrame( { - "value": pd.Series(["1", "2", "3"], dtype="int64"), - "ts": pd.Series( - [ - "2021-01-01 00:30:00", - "2021-01-01 00:45:00", - "2021-01-01 02:00:00", - ], - dtype="datetime64[ns]", - ), "td": pd.Series( ["0 days 01:00:00", "0 days 00:15:00", "0 days 01:15:00"], dtype="timedelta64[ns]", ), - "amount": pd.Series(["10", "20", "30"], dtype="int64"), - "grps": pd.Series(["a", "a", "b"], dtype="object"), + "grps": ["a", "a", "b"], } ) gb = df.groupby("grps")