From b13d679a1e26e6f2d3fe8faf140415319d10a0d0 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 11 Sep 2021 10:26:38 -0700 Subject: [PATCH 1/3] BUG: Groupby.agg with timegrouper, nat, and numba --- pandas/core/groupby/generic.py | 4 ++-- pandas/core/groupby/groupby.py | 12 ------------ pandas/tests/groupby/test_timegrouper.py | 19 +++++++++++++++++++ 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 28c44ff64c3e1..b61e8a50ed55c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -231,7 +231,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) result = self._aggregate_with_numba( data.to_frame(), func, *args, engine_kwargs=engine_kwargs, **kwargs ) - index = self._group_keys_index + index = self.grouper.result_index return self.obj._constructor(result.ravel(), index=index, name=data.name) relabeling = func is None @@ -910,7 +910,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) result = self._aggregate_with_numba( data, func, *args, engine_kwargs=engine_kwargs, **kwargs ) - index = self._group_keys_index + index = self.grouper.result_index return self.obj._constructor(result, index=index, columns=data.columns) relabeling, func, columns, order = reconstruct_func(func, **kwargs) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a969b9d0ee160..ee8d7ef459a28 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1226,18 +1226,6 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool: # expected "bool") return numeric_only # type: ignore[return-value] - @cache_readonly - def _group_keys_index(self) -> Index: - # The index to use for the result of Groupby Aggregations. - # This _may_ be redundant with self.grouper.result_index, but that - # has not been conclusively proven yet. - keys = self.grouper.group_keys_seq - if self.grouper.nkeys > 1: - index = MultiIndex.from_tuples(keys, names=self.grouper.names) - else: - index = Index._with_infer(keys, name=self.grouper.names[0]) - return index - # ----------------------------------------------------------------- # numba diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 0debe39eb294d..a467a50d08822 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -886,3 +886,22 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze( ex_values = df["Quantity"].take(ordering).values * 2 expected = Series(ex_values, index=mi, name="Quantity") tm.assert_series_equal(res, expected) + + def test_groupby_agg_numba_timegrouper_with_nat( + self, groupby_with_truncated_bingrouper + ): + # See discussion in GH#43487 + gb = groupby_with_truncated_bingrouper + + result = gb["Quantity"].aggregate( + lambda values, index: np.nanmean(values), engine="numba" + ) + + expected = gb["Quantity"].aggregate(np.nanmean) + tm.assert_series_equal(result, expected) + + result_df = gb[["Quantity"]].aggregate( + lambda values, index: np.nanmean(values), engine="numba" + ) + expected_df = gb[["Quantity"]].aggregate(np.nanmean) + tm.assert_frame_equal(result_df, expected_df) From 2aadb82b909ad0fd3ba36954909d6000f1239b78 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 11 Sep 2021 11:54:45 -0700 Subject: [PATCH 2/3] skip if no numba --- pandas/tests/groupby/test_timegrouper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index a467a50d08822..e9451770054c0 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -7,6 +7,8 @@ import pytest import pytz +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( DataFrame, @@ -887,6 +889,7 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze( expected = Series(ex_values, index=mi, name="Quantity") tm.assert_series_equal(res, expected) + @td.skip_if_no("numba") def test_groupby_agg_numba_timegrouper_with_nat( self, groupby_with_truncated_bingrouper ): From 7782fead80628bfdbd74e46548bc97de24cc675e Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 12 Sep 2021 08:04:36 -0700 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 328499a4ae98e..1f8f637260c08 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -421,7 +421,7 @@ Groupby/resample/rolling - Bug in :meth:`Series.rolling` when the :class:`Series` ``dtype`` was ``Int64`` (:issue:`43016`) - Bug in :meth:`DataFrame.rolling.corr` when the :class:`DataFrame` columns was a :class:`MultiIndex` (:issue:`21157`) - Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would subsequently return incorrect results (:issue:`43355`) -- Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`) +- Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`, :issue:`43515`) Reshaping ^^^^^^^^^