From 047380ce467349db5c4bd710a85d563874fa718a Mon Sep 17 00:00:00 2001 From: richard Date: Mon, 23 May 2022 16:43:45 -0400 Subject: [PATCH 1/2] BUG: Resampler attempts to aggregate the `on` column --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/resample.py | 16 +++++++++-- pandas/tests/resample/test_resample_api.py | 27 ++++++++++++++----- .../tests/resample/test_resampler_grouper.py | 1 - 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e9e13c3ed5bbe..7a0cb57f8c419 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -855,7 +855,7 @@ Groupby/resample/rolling - Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`) - Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`) - Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`) -- +- Bug in :meth:`DataFrame.rolling` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index fd331fef32703..dcd9aceaf8474 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -165,6 +165,10 @@ def __init__( self.groupby._set_grouper(self._convert_obj(obj), sort=True) self.binner, self.grouper = self._get_binner() self._selection = selection + if self.groupby.key is not None: + self.exclusions = frozenset([self.groupby.key]) + else: + self.exclusions = frozenset() @final def _shallow_copy(self, obj, **kwargs): @@ -426,7 +430,11 @@ def _groupby_and_aggregate(self, how, *args, **kwargs): """ grouper = self.grouper - obj = self._selected_obj + if self._selected_obj.ndim == 1: + obj = self._selected_obj + else: + # Excludes `on` column when provided + obj = self._obj_with_exclusions grouped = get_groupby( obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys ) @@ -1180,7 +1188,11 @@ def _downsample(self, how, **kwargs): """ how = com.get_cython_func(how) or how ax = self.ax - obj = self._selected_obj + if self._selected_obj.ndim == 1: + obj = self._selected_obj + else: + # Excludes `on` column when provided + obj = self._obj_with_exclusions if not len(ax): # reset to the new freq diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 04b629d089925..d3a80f8324246 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -272,14 +272,29 @@ def test_combined_up_downsampling_of_irregular(): tm.assert_series_equal(result, expected) -def test_transform(): - +def test_transform_series(): r = test_series.resample("20min") expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean") result = r.transform("mean") tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("on", [None, "date"]) +def test_transform_frame(on): + index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") + index.name = "date" + df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index) + expected = df.groupby(pd.Grouper(freq="20min")).transform("mean") + if on == "date": + # Move date to being a column; result will then have a RangeIndex + expected = expected.reset_index(drop=True) + df = df.reset_index() + + r = df.resample("20min", on=on) + result = r.transform("mean") + tm.assert_frame_equal(result, expected) + + def test_fillna(): # need to upsample here @@ -390,7 +405,8 @@ def test_agg(): expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) for t in cases: - warn = FutureWarning if t in cases[1:3] else None + # In case 2, "date" is an index and a column, so agg still tries to agg + warn = FutureWarning if t == cases[2] else None with tm.assert_produces_warning( warn, match=r"\['date'\] did not aggregate successfully", @@ -660,12 +676,11 @@ def test_selection_api_validation(): exp = df_exp.resample("2D").sum() exp.index.name = "date" - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.resample("2D", on="date").sum() + result = df.resample("2D", on="date").sum() tm.assert_frame_equal(exp, result) exp.index.name = "d" + msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): result = df.resample("2D", level="d").sum() tm.assert_frame_equal(exp, result) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 5392ec88544a1..c54d9de009940 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -464,7 +464,6 @@ def test_resample_groupby_agg_object_dtype_all_nan(consolidate): expected = DataFrame( { "key": ["A"] * 3 + ["B"] * 3, - "date": pd.to_datetime(["2020-01-01", "2020-01-06", "2020-01-13"] * 2), "col1": [0, 5, 12] * 2, "col_object": ["val"] * 3 + [np.nan] * 3, }, From ccffac7a8c4b08dbb5b33b828ea62976565952ce Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 24 May 2022 17:12:10 -0400 Subject: [PATCH 2/2] fixups --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/tests/resample/test_resample_api.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7a0cb57f8c419..2aec414f2ad89 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -855,7 +855,7 @@ Groupby/resample/rolling - Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`) - Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`) - Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`) -- Bug in :meth:`DataFrame.rolling` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`) +- Bug in :meth:`DataFrame.resample` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`) Reshaping ^^^^^^^^^ diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index d3a80f8324246..12f83c39c0143 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -281,6 +281,7 @@ def test_transform_series(): @pytest.mark.parametrize("on", [None, "date"]) def test_transform_frame(on): + # GH#47079 index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)