Skip to content

Commit b591523

Browse files
authored
BUG: Resampler attempts to aggregate the on column (#47107)
1 parent 339623b commit b591523

File tree

4 files changed

+37
-10
lines changed

4 files changed

+37
-10
lines changed

doc/source/whatsnew/v1.5.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,7 @@ Groupby/resample/rolling
855855
- Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
856856
- Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`)
857857
- Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`)
858-
-
858+
- Bug in :meth:`DataFrame.resample` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`)
859859

860860
Reshaping
861861
^^^^^^^^^

pandas/core/resample.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,10 @@ def __init__(
165165
self.groupby._set_grouper(self._convert_obj(obj), sort=True)
166166
self.binner, self.grouper = self._get_binner()
167167
self._selection = selection
168+
if self.groupby.key is not None:
169+
self.exclusions = frozenset([self.groupby.key])
170+
else:
171+
self.exclusions = frozenset()
168172

169173
@final
170174
def _shallow_copy(self, obj, **kwargs):
@@ -426,7 +430,11 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
426430
"""
427431
grouper = self.grouper
428432

429-
obj = self._selected_obj
433+
if self._selected_obj.ndim == 1:
434+
obj = self._selected_obj
435+
else:
436+
# Excludes `on` column when provided
437+
obj = self._obj_with_exclusions
430438
grouped = get_groupby(
431439
obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys
432440
)
@@ -1180,7 +1188,11 @@ def _downsample(self, how, **kwargs):
11801188
"""
11811189
how = com.get_cython_func(how) or how
11821190
ax = self.ax
1183-
obj = self._selected_obj
1191+
if self._selected_obj.ndim == 1:
1192+
obj = self._selected_obj
1193+
else:
1194+
# Excludes `on` column when provided
1195+
obj = self._obj_with_exclusions
11841196

11851197
if not len(ax):
11861198
# reset to the new freq

pandas/tests/resample/test_resample_api.py

+22-6
Original file line numberDiff line numberDiff line change
@@ -272,14 +272,30 @@ def test_combined_up_downsampling_of_irregular():
272272
tm.assert_series_equal(result, expected)
273273

274274

275-
def test_transform():
276-
275+
def test_transform_series():
277276
r = test_series.resample("20min")
278277
expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean")
279278
result = r.transform("mean")
280279
tm.assert_series_equal(result, expected)
281280

282281

282+
@pytest.mark.parametrize("on", [None, "date"])
283+
def test_transform_frame(on):
284+
# GH#47079
285+
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
286+
index.name = "date"
287+
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
288+
expected = df.groupby(pd.Grouper(freq="20min")).transform("mean")
289+
if on == "date":
290+
# Move date to being a column; result will then have a RangeIndex
291+
expected = expected.reset_index(drop=True)
292+
df = df.reset_index()
293+
294+
r = df.resample("20min", on=on)
295+
result = r.transform("mean")
296+
tm.assert_frame_equal(result, expected)
297+
298+
283299
def test_fillna():
284300

285301
# need to upsample here
@@ -390,7 +406,8 @@ def test_agg():
390406
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
391407
expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
392408
for t in cases:
393-
warn = FutureWarning if t in cases[1:3] else None
409+
# In case 2, "date" is an index and a column, so agg still tries to agg
410+
warn = FutureWarning if t == cases[2] else None
394411
with tm.assert_produces_warning(
395412
warn,
396413
match=r"\['date'\] did not aggregate successfully",
@@ -660,12 +677,11 @@ def test_selection_api_validation():
660677

661678
exp = df_exp.resample("2D").sum()
662679
exp.index.name = "date"
663-
msg = "The default value of numeric_only"
664-
with tm.assert_produces_warning(FutureWarning, match=msg):
665-
result = df.resample("2D", on="date").sum()
680+
result = df.resample("2D", on="date").sum()
666681
tm.assert_frame_equal(exp, result)
667682

668683
exp.index.name = "d"
684+
msg = "The default value of numeric_only"
669685
with tm.assert_produces_warning(FutureWarning, match=msg):
670686
result = df.resample("2D", level="d").sum()
671687
tm.assert_frame_equal(exp, result)

pandas/tests/resample/test_resampler_grouper.py

-1
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,6 @@ def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
464464
expected = DataFrame(
465465
{
466466
"key": ["A"] * 3 + ["B"] * 3,
467-
"date": pd.to_datetime(["2020-01-01", "2020-01-06", "2020-01-13"] * 2),
468467
"col1": [0, 5, 12] * 2,
469468
"col_object": ["val"] * 3 + [np.nan] * 3,
470469
},

0 commit comments

Comments
 (0)