From 00d53f8a3f1dd0770f5cf1fe6fc105543a98e846 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Tue, 21 Jul 2020 00:04:58 +0200 Subject: [PATCH 1/4] fix bug when combining groupby with resample and interpolate with datetime-index (GH 35325) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/resample.py | 2 +- .../tests/resample/test_resampler_grouper.py | 62 +++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 43d1244c15d8a..2aea3a0a75954 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1132,6 +1132,7 @@ Groupby/resample/rolling - Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`) - Bug in :meth:`DataFrame.groupby` raising an ``AttributeError`` when selecting a column and aggregating with ``as_index=False`` (:issue:`35246`). - Bug in :meth:'DataFrameGroupBy.first' and :meth:'DataFrameGroupBy.last' that would raise an unnecessary ``ValueError`` when grouping on multiple ``Categoricals`` (:issue:`34951`) +- Bug when combining methods :meth:`DataFrame.groupby` with :meth:`DataFrame.resample` and :meth:`DataFrame.interpolate` raising an ``TypeError`` (:issue:`35325`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index bfdfc65723433..e25d21eed5ea1 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -795,7 +795,7 @@ def interpolate( """ Interpolate values according to different methods. """ - result = self._upsample(None) + result = self._upsample("asfreq") return result.interpolate( method=method, axis=axis, diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index b36b11582c1ec..d118569c8b86a 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -346,3 +346,65 @@ def test_median_duplicate_columns(): result = df.resample("5s").median() expected.columns = result.columns tm.assert_frame_equal(result, expected) + + +def test_groupby_resample_interpolate(): + # GH 35325 + d = {"price": [10, 11, 9], "volume": [50, 60, 50]} + + df = pd.DataFrame(d) + + df["week_starting"] = pd.date_range("01/01/2018", periods=3, freq="W") + + result = ( + df.set_index("week_starting") + .groupby("volume") + .resample("1D") + .interpolate(method="linear") + ) + expected_ind = pd.MultiIndex.from_tuples( + [ + (50, "2018-01-07"), + (50, pd.Timestamp("2018-01-08")), + (50, pd.Timestamp("2018-01-09")), + (50, pd.Timestamp("2018-01-10")), + (50, pd.Timestamp("2018-01-11")), + (50, pd.Timestamp("2018-01-12")), + (50, pd.Timestamp("2018-01-13")), + (50, pd.Timestamp("2018-01-14")), + (50, pd.Timestamp("2018-01-15")), + (50, pd.Timestamp("2018-01-16")), + (50, pd.Timestamp("2018-01-17")), + (50, pd.Timestamp("2018-01-18")), + (50, pd.Timestamp("2018-01-19")), + (50, pd.Timestamp("2018-01-20")), + (50, pd.Timestamp("2018-01-21")), + (60, pd.Timestamp("2018-01-14")), + ], + names=["volume", "week_starting"], + ) + expected = pd.DataFrame( + data={ + "price": [ + 10.0, + 9.928571428571429, + 9.857142857142858, + 9.785714285714286, + 9.714285714285714, + 9.642857142857142, + 9.571428571428571, + 9.5, + 9.428571428571429, + 9.357142857142858, + 9.285714285714286, + 9.214285714285714, + 9.142857142857142, + 9.071428571428571, + 9.0, + 11.0, + ], + "volume": [50.0] * 15 + [60], + }, + index=expected_ind, + ) + tm.assert_frame_equal(result, expected) From c593a83c4f2e68723576a3b3a2a3580d008534b7 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Wed, 5 Aug 2020 19:44:41 +0200 Subject: [PATCH 2/4] added whatsnew entry in v1.2.0 --- doc/source/whatsnew/v1.2.0.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b16ca0a80c5b4..eb919f0ca3ec8 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -132,9 +132,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- -- - +- Bug when combining methods :meth:`DataFrame.groupby` with :meth:`DataFrame.resample` and :meth:`DataFrame.interpolate` raising an ``TypeError`` (:issue:`35325`) Reshaping ^^^^^^^^^ From 87486b8012d0e4e4d254a8e01e39dc123d90bde9 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Sat, 15 Aug 2020 10:06:49 +0200 Subject: [PATCH 3/4] removed merge markers --- doc/source/whatsnew/v1.2.0.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 8b8f09247606d..b54f2e435575c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -152,14 +152,11 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -<<<<<<< HEAD - Bug when combining methods :meth:`DataFrame.groupby` with :meth:`DataFrame.resample` and :meth:`DataFrame.interpolate` raising an ``TypeError`` (:issue:`35325`) -======= - Bug in :meth:`DataFrameGroupBy.apply` that would some times throw an erroneous ``ValueError`` if the grouping axis had duplicate entries (:issue:`16646`) - - ->>>>>>> master Reshaping ^^^^^^^^^ From ef5bceb75e5c3871b1e19f9f3f0d4b0063e97991 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Wed, 19 Aug 2020 15:15:12 +0200 Subject: [PATCH 4/4] moved test from test_resampler_grouper to test_time_grouper --- .../tests/resample/test_resampler_grouper.py | 62 ------------------- pandas/tests/resample/test_time_grouper.py | 62 +++++++++++++++++++ 2 files changed, 62 insertions(+), 62 deletions(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index d118569c8b86a..b36b11582c1ec 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -346,65 +346,3 @@ def test_median_duplicate_columns(): result = df.resample("5s").median() expected.columns = result.columns tm.assert_frame_equal(result, expected) - - -def test_groupby_resample_interpolate(): - # GH 35325 - d = {"price": [10, 11, 9], "volume": [50, 60, 50]} - - df = pd.DataFrame(d) - - df["week_starting"] = pd.date_range("01/01/2018", periods=3, freq="W") - - result = ( - df.set_index("week_starting") - .groupby("volume") - .resample("1D") - .interpolate(method="linear") - ) - expected_ind = pd.MultiIndex.from_tuples( - [ - (50, "2018-01-07"), - (50, pd.Timestamp("2018-01-08")), - (50, pd.Timestamp("2018-01-09")), - (50, pd.Timestamp("2018-01-10")), - (50, pd.Timestamp("2018-01-11")), - (50, pd.Timestamp("2018-01-12")), - (50, pd.Timestamp("2018-01-13")), - (50, pd.Timestamp("2018-01-14")), - (50, pd.Timestamp("2018-01-15")), - (50, pd.Timestamp("2018-01-16")), - (50, pd.Timestamp("2018-01-17")), - (50, pd.Timestamp("2018-01-18")), - (50, pd.Timestamp("2018-01-19")), - (50, pd.Timestamp("2018-01-20")), - (50, pd.Timestamp("2018-01-21")), - (60, pd.Timestamp("2018-01-14")), - ], - names=["volume", "week_starting"], - ) - expected = pd.DataFrame( - data={ - "price": [ - 10.0, - 9.928571428571429, - 9.857142857142858, - 9.785714285714286, - 9.714285714285714, - 9.642857142857142, - 9.571428571428571, - 9.5, - 9.428571428571429, - 9.357142857142858, - 9.285714285714286, - 9.214285714285714, - 9.142857142857142, - 9.071428571428571, - 9.0, - 11.0, - ], - "volume": [50.0] * 15 + [60], - }, - index=expected_ind, - ) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 26e429c47b494..f638706207679 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -287,3 +287,65 @@ def test_upsample_sum(method, method_args, expected_values): result = methodcaller(method, **method_args)(resampled) expected = pd.Series(expected_values, index=index) tm.assert_series_equal(result, expected) + + +def test_groupby_resample_interpolate(): + # GH 35325 + d = {"price": [10, 11, 9], "volume": [50, 60, 50]} + + df = pd.DataFrame(d) + + df["week_starting"] = pd.date_range("01/01/2018", periods=3, freq="W") + + result = ( + df.set_index("week_starting") + .groupby("volume") + .resample("1D") + .interpolate(method="linear") + ) + expected_ind = pd.MultiIndex.from_tuples( + [ + (50, "2018-01-07"), + (50, pd.Timestamp("2018-01-08")), + (50, pd.Timestamp("2018-01-09")), + (50, pd.Timestamp("2018-01-10")), + (50, pd.Timestamp("2018-01-11")), + (50, pd.Timestamp("2018-01-12")), + (50, pd.Timestamp("2018-01-13")), + (50, pd.Timestamp("2018-01-14")), + (50, pd.Timestamp("2018-01-15")), + (50, pd.Timestamp("2018-01-16")), + (50, pd.Timestamp("2018-01-17")), + (50, pd.Timestamp("2018-01-18")), + (50, pd.Timestamp("2018-01-19")), + (50, pd.Timestamp("2018-01-20")), + (50, pd.Timestamp("2018-01-21")), + (60, pd.Timestamp("2018-01-14")), + ], + names=["volume", "week_starting"], + ) + expected = pd.DataFrame( + data={ + "price": [ + 10.0, + 9.928571428571429, + 9.857142857142858, + 9.785714285714286, + 9.714285714285714, + 9.642857142857142, + 9.571428571428571, + 9.5, + 9.428571428571429, + 9.357142857142858, + 9.285714285714286, + 9.214285714285714, + 9.142857142857142, + 9.071428571428571, + 9.0, + 11.0, + ], + "volume": [50.0] * 15 + [60], + }, + index=expected_ind, + ) + tm.assert_frame_equal(result, expected)