Skip to content

BUG: GroupBy.apply with timegrouper and NaT #43509

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1168,11 +1168,7 @@ def _wrap_applied_output_series(
applied_index = self._selected_obj._get_axis(self.axis)
singular_series = len(values) == 1 and applied_index.nlevels == 1

# assign the name to this series
if singular_series:
keys = self.grouper.group_keys_seq
values[0].name = keys[0]

# GH2893
# we have series in the values array, we want to
# produce a series:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,7 +1037,7 @@ def reset_identity(values):
if self.as_index:

# possible MI return case
group_keys = self.grouper.group_keys_seq
group_keys = self.grouper.result_index
group_levels = self.grouper.levels
group_names = self.grouper.names

Expand Down
57 changes: 48 additions & 9 deletions pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,11 @@


@pytest.fixture
def groupby_with_truncated_bingrouper():
def frame_for_truncated_bingrouper():
"""
GroupBy object such that gb.grouper is a BinGrouper and
len(gb.grouper.result_index) < len(gb.grouper.group_keys_seq)

Aggregations on this groupby should have

dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date")

As either the index or an index level.
DataFrame used by groupby_with_truncated_bingrouper, made into
a separate fixture for easier re-use in
test_groupby_apply_timegrouper_with_nat_apply_squeeze
"""
df = DataFrame(
{
Expand All @@ -48,6 +43,22 @@ def groupby_with_truncated_bingrouper():
],
}
)
return df


@pytest.fixture
def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper):
"""
GroupBy object such that gb.grouper is a BinGrouper and
len(gb.grouper.result_index) < len(gb.grouper.group_keys_seq)

Aggregations on this groupby should have

dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date")

As either the index or an index level.
"""
df = frame_for_truncated_bingrouper

tdg = Grouper(key="Date", freq="5D")
gb = df.groupby(tdg)
Expand Down Expand Up @@ -847,3 +858,31 @@ def test_groupby_apply_timegrouper_with_nat_scalar_returns(
)

tm.assert_series_equal(res, expected)

def test_groupby_apply_timegrouper_with_nat_apply_squeeze(
self, frame_for_truncated_bingrouper
):
df = frame_for_truncated_bingrouper

# We need to create a GroupBy object with only one non-NaT group,
# so use a huge freq so that all non-NaT dates will be grouped together
tdg = Grouper(key="Date", freq="100Y")

with tm.assert_produces_warning(FutureWarning, match="`squeeze` parameter"):
gb = df.groupby(tdg, squeeze=True)

# check that we will go through the singular_series path
# in _wrap_applied_output_series
assert gb.ngroups == 1
assert gb._selected_obj._get_axis(gb.axis).nlevels == 1

# function that returns a Series
res = gb.apply(lambda x: x["Quantity"] * 2)

key = Timestamp("2013-12-31")
ordering = df["Date"].sort_values().dropna().index
mi = MultiIndex.from_product([[key], ordering], names=["Date", None])

ex_values = df["Quantity"].take(ordering).values * 2
expected = Series(ex_values, index=mi, name="Quantity")
tm.assert_series_equal(res, expected)