From 2310d0d523fe37544b55dbf1c61b5f9863c701d6 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 10 Sep 2021 21:23:45 -0700 Subject: [PATCH] BUG: GroupBy.apply with timegrouper and NaT --- pandas/core/groupby/generic.py | 4 -- pandas/core/groupby/groupby.py | 2 +- pandas/tests/groupby/test_timegrouper.py | 57 ++++++++++++++++++++---- 3 files changed, 49 insertions(+), 14 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 469e0e3901721..e031ed158c131 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1168,11 +1168,7 @@ def _wrap_applied_output_series( applied_index = self._selected_obj._get_axis(self.axis) singular_series = len(values) == 1 and applied_index.nlevels == 1 - # assign the name to this series if singular_series: - keys = self.grouper.group_keys_seq - values[0].name = keys[0] - # GH2893 # we have series in the values array, we want to # produce a series: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 256276c3c91e3..a06eceb217cdc 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1037,7 +1037,7 @@ def reset_identity(values): if self.as_index: # possible MI return case - group_keys = self.grouper.group_keys_seq + group_keys = self.grouper.result_index group_levels = self.grouper.levels group_names = self.grouper.names diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index f40fb8cba3435..0debe39eb294d 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -24,16 +24,11 @@ @pytest.fixture -def groupby_with_truncated_bingrouper(): +def frame_for_truncated_bingrouper(): """ - GroupBy object such that gb.grouper is a BinGrouper and - len(gb.grouper.result_index) < len(gb.grouper.group_keys_seq) - - Aggregations on this groupby should have - - dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date") - - As either the index or an index level. + DataFrame used by groupby_with_truncated_bingrouper, made into + a separate fixture for easier re-use in + test_groupby_apply_timegrouper_with_nat_apply_squeeze """ df = DataFrame( { @@ -48,6 +43,22 @@ def groupby_with_truncated_bingrouper(): ], } ) + return df + + +@pytest.fixture +def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper): + """ + GroupBy object such that gb.grouper is a BinGrouper and + len(gb.grouper.result_index) < len(gb.grouper.group_keys_seq) + + Aggregations on this groupby should have + + dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date") + + As either the index or an index level. + """ + df = frame_for_truncated_bingrouper tdg = Grouper(key="Date", freq="5D") gb = df.groupby(tdg) @@ -847,3 +858,31 @@ def test_groupby_apply_timegrouper_with_nat_scalar_returns( ) tm.assert_series_equal(res, expected) + + def test_groupby_apply_timegrouper_with_nat_apply_squeeze( + self, frame_for_truncated_bingrouper + ): + df = frame_for_truncated_bingrouper + + # We need to create a GroupBy object with only one non-NaT group, + # so use a huge freq so that all non-NaT dates will be grouped together + tdg = Grouper(key="Date", freq="100Y") + + with tm.assert_produces_warning(FutureWarning, match="`squeeze` parameter"): + gb = df.groupby(tdg, squeeze=True) + + # check that we will go through the singular_series path + # in _wrap_applied_output_series + assert gb.ngroups == 1 + assert gb._selected_obj._get_axis(gb.axis).nlevels == 1 + + # function that returns a Series + res = gb.apply(lambda x: x["Quantity"] * 2) + + key = Timestamp("2013-12-31") + ordering = df["Date"].sort_values().dropna().index + mi = MultiIndex.from_product([[key], ordering], names=["Date", None]) + + ex_values = df["Quantity"].take(ordering).values * 2 + expected = Series(ex_values, index=mi, name="Quantity") + tm.assert_series_equal(res, expected)