Skip to content

Commit 7b4dff4

Browse files
authored
BUG: Groupby.agg with timegrouper, nat, and numba (#43515)
1 parent 6549a74 commit 7b4dff4

File tree

4 files changed

+25
-15
lines changed

4 files changed

+25
-15
lines changed

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ Groupby/resample/rolling
423423
- Bug in :meth:`Series.rolling` when the :class:`Series` ``dtype`` was ``Int64`` (:issue:`43016`)
424424
- Bug in :meth:`DataFrame.rolling.corr` when the :class:`DataFrame` columns was a :class:`MultiIndex` (:issue:`21157`)
425425
- Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would subsequently return incorrect results (:issue:`43355`)
426-
- Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`)
426+
- Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`, :issue:`43515`)
427427

428428
Reshaping
429429
^^^^^^^^^

pandas/core/groupby/generic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
241241
result = self._aggregate_with_numba(
242242
data.to_frame(), func, *args, engine_kwargs=engine_kwargs, **kwargs
243243
)
244-
index = self._group_keys_index
244+
index = self.grouper.result_index
245245
return self.obj._constructor(result.ravel(), index=index, name=data.name)
246246

247247
relabeling = func is None
@@ -896,7 +896,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
896896
result = self._aggregate_with_numba(
897897
data, func, *args, engine_kwargs=engine_kwargs, **kwargs
898898
)
899-
index = self._group_keys_index
899+
index = self.grouper.result_index
900900
return self.obj._constructor(result, index=index, columns=data.columns)
901901

902902
relabeling, func, columns, order = reconstruct_func(func, **kwargs)

pandas/core/groupby/groupby.py

-12
Original file line numberDiff line numberDiff line change
@@ -1226,18 +1226,6 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
12261226
# expected "bool")
12271227
return numeric_only # type: ignore[return-value]
12281228

1229-
@cache_readonly
1230-
def _group_keys_index(self) -> Index:
1231-
# The index to use for the result of Groupby Aggregations.
1232-
# This _may_ be redundant with self.grouper.result_index, but that
1233-
# has not been conclusively proven yet.
1234-
keys = self.grouper.group_keys_seq
1235-
if self.grouper.nkeys > 1:
1236-
index = MultiIndex.from_tuples(keys, names=self.grouper.names)
1237-
else:
1238-
index = Index._with_infer(keys, name=self.grouper.names[0])
1239-
return index
1240-
12411229
# -----------------------------------------------------------------
12421230
# numba
12431231

pandas/tests/groupby/test_timegrouper.py

+22
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import pytest
88
import pytz
99

10+
import pandas.util._test_decorators as td
11+
1012
import pandas as pd
1113
from pandas import (
1214
DataFrame,
@@ -886,3 +888,23 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze(
886888
ex_values = df["Quantity"].take(ordering).values * 2
887889
expected = Series(ex_values, index=mi, name="Quantity")
888890
tm.assert_series_equal(res, expected)
891+
892+
@td.skip_if_no("numba")
893+
def test_groupby_agg_numba_timegrouper_with_nat(
894+
self, groupby_with_truncated_bingrouper
895+
):
896+
# See discussion in GH#43487
897+
gb = groupby_with_truncated_bingrouper
898+
899+
result = gb["Quantity"].aggregate(
900+
lambda values, index: np.nanmean(values), engine="numba"
901+
)
902+
903+
expected = gb["Quantity"].aggregate(np.nanmean)
904+
tm.assert_series_equal(result, expected)
905+
906+
result_df = gb[["Quantity"]].aggregate(
907+
lambda values, index: np.nanmean(values), engine="numba"
908+
)
909+
expected_df = gb[["Quantity"]].aggregate(np.nanmean)
910+
tm.assert_frame_equal(result_df, expected_df)

0 commit comments

Comments
 (0)