BUG: Groupby.agg with timegrouper, nat, and numba (#43515)

jbrockmendel · web-flow · commit 7b4dff49b2d4 · 2021-09-12T12:25:46.000-04:00
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -423,7 +423,7 @@ Groupby/resample/rolling
 - Bug in :meth:`Series.rolling` when the :class:`Series` ``dtype`` was ``Int64`` (:issue:`43016`)
 - Bug in :meth:`DataFrame.rolling.corr` when the :class:`DataFrame` columns was a :class:`MultiIndex` (:issue:`21157`)
 - Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would subsequently return incorrect results (:issue:`43355`)
-- Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`)
+- Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`, :issue:`43515`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -241,7 +241,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             result = self._aggregate_with_numba(
                 data.to_frame(), func, *args, engine_kwargs=engine_kwargs, **kwargs
             )
-            index = self._group_keys_index
+            index = self.grouper.result_index
             return self.obj._constructor(result.ravel(), index=index, name=data.name)
 
         relabeling = func is None
@@ -896,7 +896,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             result = self._aggregate_with_numba(
                 data, func, *args, engine_kwargs=engine_kwargs, **kwargs
             )
-            index = self._group_keys_index
+            index = self.grouper.result_index
             return self.obj._constructor(result, index=index, columns=data.columns)
 
         relabeling, func, columns, order = reconstruct_func(func, **kwargs)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1226,18 +1226,6 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
         # expected "bool")
         return numeric_only  # type: ignore[return-value]
 
-    @cache_readonly
-    def _group_keys_index(self) -> Index:
-        # The index to use for the result of Groupby Aggregations.
-        # This _may_ be redundant with self.grouper.result_index, but that
-        #  has not been conclusively proven yet.
-        keys = self.grouper.group_keys_seq
-        if self.grouper.nkeys > 1:
-            index = MultiIndex.from_tuples(keys, names=self.grouper.names)
-        else:
-            index = Index._with_infer(keys, name=self.grouper.names[0])
-        return index
-
     # -----------------------------------------------------------------
     # numba
 
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
@@ -7,6 +7,8 @@
 import pytest
 import pytz
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -886,3 +888,23 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze(
         ex_values = df["Quantity"].take(ordering).values * 2
         expected = Series(ex_values, index=mi, name="Quantity")
         tm.assert_series_equal(res, expected)
+
+    @td.skip_if_no("numba")
+    def test_groupby_agg_numba_timegrouper_with_nat(
+        self, groupby_with_truncated_bingrouper
+    ):
+        # See discussion in GH#43487
+        gb = groupby_with_truncated_bingrouper
+
+        result = gb["Quantity"].aggregate(
+            lambda values, index: np.nanmean(values), engine="numba"
+        )
+
+        expected = gb["Quantity"].aggregate(np.nanmean)
+        tm.assert_series_equal(result, expected)
+
+        result_df = gb[["Quantity"]].aggregate(
+            lambda values, index: np.nanmean(values), engine="numba"
+        )
+        expected_df = gb[["Quantity"]].aggregate(np.nanmean)
+        tm.assert_frame_equal(result_df, expected_df)