BUG: Resampler attempts to aggregate the on column (#47107)

rhshadrach · web-flow · commit b5915239bdf6 · 2022-05-27T08:53:31.000-04:00
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -855,7 +855,7 @@ Groupby/resample/rolling
 - Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
 - Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`)
 - Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`)
--
+- Bug in :meth:`DataFrame.resample` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -165,6 +165,10 @@ def __init__(
         self.groupby._set_grouper(self._convert_obj(obj), sort=True)
         self.binner, self.grouper = self._get_binner()
         self._selection = selection
+        if self.groupby.key is not None:
+            self.exclusions = frozenset([self.groupby.key])
+        else:
+            self.exclusions = frozenset()
 
     @final
     def _shallow_copy(self, obj, **kwargs):
@@ -426,7 +430,11 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
         """
         grouper = self.grouper
 
-        obj = self._selected_obj
+        if self._selected_obj.ndim == 1:
+            obj = self._selected_obj
+        else:
+            # Excludes `on` column when provided
+            obj = self._obj_with_exclusions
         grouped = get_groupby(
             obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys
         )
@@ -1180,7 +1188,11 @@ def _downsample(self, how, **kwargs):
         """
         how = com.get_cython_func(how) or how
         ax = self.ax
-        obj = self._selected_obj
+        if self._selected_obj.ndim == 1:
+            obj = self._selected_obj
+        else:
+            # Excludes `on` column when provided
+            obj = self._obj_with_exclusions
 
         if not len(ax):
             # reset to the new freq
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
@@ -272,14 +272,30 @@ def test_combined_up_downsampling_of_irregular():
     tm.assert_series_equal(result, expected)
 
 
-def test_transform():
-
+def test_transform_series():
     r = test_series.resample("20min")
     expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean")
     result = r.transform("mean")
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize("on", [None, "date"])
+def test_transform_frame(on):
+    # GH#47079
+    index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
+    index.name = "date"
+    df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
+    expected = df.groupby(pd.Grouper(freq="20min")).transform("mean")
+    if on == "date":
+        # Move date to being a column; result will then have a RangeIndex
+        expected = expected.reset_index(drop=True)
+        df = df.reset_index()
+
+    r = df.resample("20min", on=on)
+    result = r.transform("mean")
+    tm.assert_frame_equal(result, expected)
+
+
 def test_fillna():
 
     # need to upsample here
@@ -390,7 +406,8 @@ def test_agg():
     expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
     expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     for t in cases:
-        warn = FutureWarning if t in cases[1:3] else None
+        # In case 2, "date" is an index and a column, so agg still tries to agg
+        warn = FutureWarning if t == cases[2] else None
         with tm.assert_produces_warning(
             warn,
             match=r"\['date'\] did not aggregate successfully",
@@ -660,12 +677,11 @@ def test_selection_api_validation():
 
     exp = df_exp.resample("2D").sum()
     exp.index.name = "date"
-    msg = "The default value of numeric_only"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df.resample("2D", on="date").sum()
+    result = df.resample("2D", on="date").sum()
     tm.assert_frame_equal(exp, result)
 
     exp.index.name = "d"
+    msg = "The default value of numeric_only"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = df.resample("2D", level="d").sum()
     tm.assert_frame_equal(exp, result)
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
@@ -464,7 +464,6 @@ def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
     expected = DataFrame(
         {
             "key": ["A"] * 3 + ["B"] * 3,
-            "date": pd.to_datetime(["2020-01-01", "2020-01-06", "2020-01-13"] * 2),
             "col1": [0, 5, 12] * 2,
             "col_object": ["val"] * 3 + [np.nan] * 3,
         },

Original file line number	Diff line number	Diff line change
`@@ -464,7 +464,6 @@ def test_resample_groupby_agg_object_dtype_all_nan(consolidate):`
`464`	`464`	`expected = DataFrame(`
`465`	`465`	`{`
`466`	`466`	`"key": ["A"] * 3 + ["B"] * 3,`
`467`		`- "date": pd.to_datetime(["2020-01-01", "2020-01-06", "2020-01-13"] * 2),`
`468`	`467`	`"col1": [0, 5, 12] * 2,`
`469`	`468`	`"col_object": ["val"] * 3 + [np.nan] * 3,`
`470`	`469`	`},`