pandas-dev · phofl · Sep 7, 2020 · Sep 7, 2020 · Sep 7, 2020 · Sep 7, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -314,6 +314,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
 - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
 - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
+- Bug when combining methods :meth:`DataFrame.groupby` with :meth:`DataFrame.resample` and restricting to `Series` or using `agg` did miscalculate the aggregation (:issue:`27343`, :issue:`33548`, :issue:`35275`).
 -
 
 Reshaping

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -311,7 +311,12 @@ def _get_grouper(self, obj, validate: bool = True):
         )
         return self.binner, self.grouper, self.obj
 
-    def _set_grouper(self, obj: FrameOrSeries, sort: bool = False):
+    def _set_grouper(
+        self,
+        obj: FrameOrSeries,
+        sort: bool = False,
+        group_indices: Optional[Dict] = None,
+    ):
         """
         given an object and the specifications, setup the internal grouper
         for this particular specification
@@ -327,9 +332,10 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False):
         if self.key is not None and self.level is not None:
             raise ValueError("The Grouper cannot specify both a key and a level!")
 
-        # Keep self.grouper value before overriding
+        # Keep self.grouper and self.indexer value before overriding
         if self._grouper is None:
             self._grouper = self.grouper
+            self._indexer = self.indexer
 
         # the key must be a valid info item
         if self.key is not None:
@@ -338,7 +344,14 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False):
             if getattr(self.grouper, "name", None) == key and isinstance(
                 obj, ABCSeries
             ):
-                ax = self._grouper.take(obj.index)
+                if group_indices is None:
 class Grouper: 
 class Grouping: 
 def indices(self): 
 x = self._shallow_copy(x, groupby=self.groupby) 
 class Grouper: 
 class Grouping: 
 def indices(self): 
 x = self._shallow_copy(x, groupby=self.groupby) 
+                    ax = self._grouper.take(obj.index)
+                else:
+                    indices = group_indices.get(obj.name)
+                    if self._indexer is not None:
+                        ax = self._grouper.take(self._indexer.argsort()).take(indices)
+                    else:
+                        ax = self._grouper.take(indices)
             else:
                 if key not in obj._info_axis:
                     raise KeyError(f"The grouper name {key} is not found")

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -91,7 +91,11 @@ def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs):
         self.grouper = None
 
         if self.groupby is not None:
-            self.groupby._set_grouper(self._convert_obj(obj), sort=True)
+            self.groupby._set_grouper(
+                self._convert_obj(obj),
+                sort=True,
+                group_indices=kwargs.get("group_indices"),
+            )
 
     def __str__(self) -> str:
         """
@@ -980,7 +984,9 @@ def _apply(self, f, grouper=None, *args, **kwargs):
         """
 
         def func(x):
-            x = self._shallow_copy(x, groupby=self.groupby)
+            x = self._shallow_copy(
+                x, groupby=self.groupby, group_indices=self._groupby.indices
+            )
 
             if isinstance(f, str):
                 return getattr(x, f)(**kwargs)

diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
@@ -124,10 +124,12 @@ def test_getitem_multiple():
     tm.assert_series_equal(result, expected)
 
 
-def test_groupby_resample_on_api_with_getitem():
+@pytest.mark.parametrize("index_values", [[0, 1, 2, 3, 4], ["a", "b", "c", "d", "e"]])
+def test_groupby_resample_on_api_with_getitem(index_values):
     # GH 17813
     df = pd.DataFrame(
-        {"id": list("aabbb"), "date": pd.date_range("1-1-2016", periods=5), "data": 1}
+        {"id": list("aabbb"), "date": pd.date_range("1-1-2016", periods=5), "data": 1},
+        index=pd.Index(index_values),
     )
     exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
     result = df.groupby("id").resample("2D", on="date")["data"].sum()
@@ -347,3 +349,67 @@ def test_median_duplicate_columns():
     result = df.resample("5s").median()
     expected.columns = result.columns
     tm.assert_frame_equal(result, expected)
+
+
+def test_resample_different_result_with_agg():
+    # GH: 35275 and 33548
+    data = pd.DataFrame(
+        {
+            "cat": ["cat1", "cat1", "cat2", "cat1", "cat2", "cat1", "cat2", "cat1"],
+            "num": [5, 20, 22, 3, 4, 30, 10, 50],
+            "date": [
+                "2019-2-1",
+                "2018-02-03",
+                "2020-3-11",
+                "2019-2-2",
+                "2019-2-2",
+                "2018-12-4",
+                "2020-3-11",
+                "2020-12-12",
+            ],
+        }
+    )
+    data["date"] = pd.to_datetime(data["date"])
+
+    resampled = data.groupby("cat").resample("Y", on="date")
+
+    index = pd.MultiIndex.from_tuples(
+        [
+            ("cat1", "2018-12-31"),
+            ("cat1", "2019-12-31"),
+            ("cat1", "2020-12-31"),
+            ("cat2", "2019-12-31"),
+            ("cat2", "2020-12-31"),
+        ],
+        names=["cat", "date"],
+    )
+    index = index.set_levels([index.levels[0], pd.to_datetime(index.levels[1])])
+    expected = DataFrame([25, 4, 50, 4, 16], columns=pd.Index(["num"]), index=index)
+    result = resampled.agg({"num": "mean"})
+    tm.assert_frame_equal(result, expected)
+    result = resampled["num"].mean()
+    tm.assert_series_equal(result, expected["num"])
+    result = resampled.mean()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_resample_agg_different_results_on_keyword():
+    # GH: 27343
+    df = pd.DataFrame.from_records(
+        {
+            "ref": ["a", "a", "a", "b", "b"],
+            "time": [
+                "2014-12-31",
+                "2015-12-31",
+                "2016-12-31",
+                "2012-12-31",
+                "2014-12-31",
+            ],
+            "value": 5 * [1],
+        }
+    )
+    df["time"] = pd.to_datetime(df["time"])
+
+    expected = df.set_index("time").groupby("ref").resample(rule="M")["value"].sum()
+    result = df.groupby("ref").resample(rule="M", on="time")["value"].sum()
+    tm.assert_series_equal(result, expected)