Skip to content

BUG: Resampler attempts to aggregate the on column #47107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ Groupby/resample/rolling
- Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
- Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`)
- Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`)
-
- Bug in :meth:`DataFrame.resample` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`)

Reshaping
^^^^^^^^^
Expand Down
16 changes: 14 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,10 @@ def __init__(
self.groupby._set_grouper(self._convert_obj(obj), sort=True)
self.binner, self.grouper = self._get_binner()
self._selection = selection
if self.groupby.key is not None:
self.exclusions = frozenset([self.groupby.key])
else:
self.exclusions = frozenset()

@final
def _shallow_copy(self, obj, **kwargs):
Expand Down Expand Up @@ -426,7 +430,11 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
"""
grouper = self.grouper

obj = self._selected_obj
if self._selected_obj.ndim == 1:
obj = self._selected_obj
else:
# Excludes `on` column when provided
obj = self._obj_with_exclusions
grouped = get_groupby(
obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys
)
Expand Down Expand Up @@ -1180,7 +1188,11 @@ def _downsample(self, how, **kwargs):
"""
how = com.get_cython_func(how) or how
ax = self.ax
obj = self._selected_obj
if self._selected_obj.ndim == 1:
obj = self._selected_obj
else:
# Excludes `on` column when provided
obj = self._obj_with_exclusions

if not len(ax):
# reset to the new freq
Expand Down
28 changes: 22 additions & 6 deletions pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,14 +272,30 @@ def test_combined_up_downsampling_of_irregular():
tm.assert_series_equal(result, expected)


def test_transform():

def test_transform_series():
r = test_series.resample("20min")
expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean")
result = r.transform("mean")
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("on", [None, "date"])
def test_transform_frame(on):
# GH#47079
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
index.name = "date"
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
expected = df.groupby(pd.Grouper(freq="20min")).transform("mean")
if on == "date":
# Move date to being a column; result will then have a RangeIndex
expected = expected.reset_index(drop=True)
df = df.reset_index()

r = df.resample("20min", on=on)
result = r.transform("mean")
tm.assert_frame_equal(result, expected)


def test_fillna():

# need to upsample here
Expand Down Expand Up @@ -390,7 +406,8 @@ def test_agg():
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
for t in cases:
warn = FutureWarning if t in cases[1:3] else None
# In case 2, "date" is an index and a column, so agg still tries to agg
warn = FutureWarning if t == cases[2] else None
with tm.assert_produces_warning(
warn,
match=r"\['date'\] did not aggregate successfully",
Expand Down Expand Up @@ -660,12 +677,11 @@ def test_selection_api_validation():

exp = df_exp.resample("2D").sum()
exp.index.name = "date"
msg = "The default value of numeric_only"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.resample("2D", on="date").sum()
result = df.resample("2D", on="date").sum()
tm.assert_frame_equal(exp, result)

exp.index.name = "d"
msg = "The default value of numeric_only"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.resample("2D", level="d").sum()
tm.assert_frame_equal(exp, result)
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/resample/test_resampler_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,6 @@ def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
expected = DataFrame(
{
"key": ["A"] * 3 + ["B"] * 3,
"date": pd.to_datetime(["2020-01-01", "2020-01-06", "2020-01-13"] * 2),
"col1": [0, 5, 12] * 2,
"col_object": ["val"] * 3 + [np.nan] * 3,
},
Expand Down