From 90498f7b042853c2a4c593098230f658ce48b7a6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 4 Apr 2021 00:03:51 -0700 Subject: [PATCH 1/4] Add as_index before 40701 is merged --- pandas/core/groupby/groupby.py | 1 + pandas/core/window/rolling.py | 5 +++++ pandas/tests/window/test_groupby.py | 27 +++++++++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a6c3cb3ff5d0b..f57786f3d8668 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1926,6 +1926,7 @@ def rolling(self, *args, **kwargs): self._selected_obj, *args, _grouper=self.grouper, + _as_index=self.as_index, **kwargs, ) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b90722857938e..33d3afb115d7a 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -553,11 +553,13 @@ def __init__( obj: FrameOrSeries, *args, _grouper=None, + _as_index=True, **kwargs, ): if _grouper is None: raise ValueError("Must pass a Grouper object.") self._grouper = _grouper + self._as_index = _as_index # GH 32262: It's convention to keep the grouping column in # groupby., but unexpected to users in # groupby.rolling. @@ -622,6 +624,9 @@ def _apply( ) result.index = result_index + if not self._as_index: + pass + # result = result_index.reset_index(levels=list(range(len(groupby_keys)))) return result def _apply_pairwise( diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 5c2f69a9247e9..c7d55250bb494 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -695,6 +695,33 @@ def test_by_column_not_in_values(self, columns): assert "A" not in result.columns tm.assert_frame_equal(g.obj, original_obj) + def test_as_index_false(self): + # GH 39433 + data = [ + ["A", "2018-01-01", 100], + ["A", "2018-01-02", 200], + ["B", "2018-01-01", 150], + ["B", "2018-01-02", 250], + ] + df = DataFrame(data, columns=["id", "date", "num"]) + df["date"] = to_datetime(df["date"]) + expected = df.set_index(["date"]) + + result = ( + expected.groupby([expected.id], as_index=False) + .rolling(window=2, min_periods=1) + .mean() + ) + tm.assert_frame_equal(result, expected) + + result = ( + expected.groupby([expected.id, expected.index.weekday], as_index=False) + .rolling(window=2, min_periods=1) + .mean() + ) + expected = DataFrame() + tm.assert_frame_equal(result, expected) + class TestExpanding: def setup_method(self): From 37e53f3ef848a36bfc3fb420c66e913d4130c7c8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 5 Apr 2021 11:16:29 -0700 Subject: [PATCH 2/4] Add test for as_index behavior --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/window/rolling.py | 2 +- pandas/tests/window/test_groupby.py | 29 +++++++++++++++++++---------- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 5e95cd6e5ee10..9df59505cafac 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -753,6 +753,7 @@ Groupby/resample/rolling - Bug in :class:`core.window.ewm.ExponentialMovingWindow` when calling ``__getitem__`` would not retain ``com``, ``span``, ``alpha`` or ``halflife`` attributes (:issue:`40164`) - :class:`core.window.ewm.ExponentialMovingWindow` now raises a ``NotImplementedError`` when specifying ``times`` with ``adjust=False`` due to an incorrect calculation (:issue:`40098`) - Bug in :meth:`Series.asfreq` and :meth:`DataFrame.asfreq` dropping rows when the index is not sorted (:issue:`39805`) +- Bug in :class:`core.window.RollingGroupby` where ``as_index`` argument in ``groupby`` was ignored (:issue:`39433`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 137a74290b627..e4e605879fa18 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -622,7 +622,7 @@ def _apply( result.index = result_index if not self._as_index: - result = result_index.reset_index(levels=list(range(len(groupby_keys)))) + result = result.reset_index(level=list(range(len(groupby_keys)))) return result def _apply_pairwise( diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index d7c526d2fa145..32d0747d2874c 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -735,28 +735,37 @@ def test_groupby_level(self): def test_as_index_false(self): # GH 39433 data = [ - ["A", "2018-01-01", 100], - ["A", "2018-01-02", 200], - ["B", "2018-01-01", 150], - ["B", "2018-01-02", 250], + ["A", "2018-01-01", 100.0], + ["A", "2018-01-02", 200.0], + ["B", "2018-01-01", 150.0], + ["B", "2018-01-02", 250.0], ] df = DataFrame(data, columns=["id", "date", "num"]) df["date"] = to_datetime(df["date"]) - expected = df.set_index(["date"]) + df = df.set_index(["date"]) result = ( - expected.groupby([expected.id], as_index=False) - .rolling(window=2, min_periods=1) - .mean() + df.groupby([df.id], as_index=False).rolling(window=2, min_periods=1).mean() + ) + expected = DataFrame( + {"id": ["A", "A", "B", "B"], "num": [100.0, 150.0, 150.0, 200.0]}, + index=df.index, ) tm.assert_frame_equal(result, expected) result = ( - expected.groupby([expected.id, expected.index.weekday], as_index=False) + df.groupby([df.id, df.index.weekday], as_index=False) .rolling(window=2, min_periods=1) .mean() ) - expected = DataFrame() + expected = DataFrame( + { + "id": ["A", "A", "B", "B"], + "date": [0, 1, 0, 1], + "num": [100.0, 200.0, 150.0, 250.0], + }, + index=df.index, + ) tm.assert_frame_equal(result, expected) From 06261258bfdb78a2735d36d2231f3453b9dae26d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 5 Apr 2021 11:17:52 -0700 Subject: [PATCH 3/4] Specifically mention False --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 9df59505cafac..ff78880aa5011 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -753,7 +753,7 @@ Groupby/resample/rolling - Bug in :class:`core.window.ewm.ExponentialMovingWindow` when calling ``__getitem__`` would not retain ``com``, ``span``, ``alpha`` or ``halflife`` attributes (:issue:`40164`) - :class:`core.window.ewm.ExponentialMovingWindow` now raises a ``NotImplementedError`` when specifying ``times`` with ``adjust=False`` due to an incorrect calculation (:issue:`40098`) - Bug in :meth:`Series.asfreq` and :meth:`DataFrame.asfreq` dropping rows when the index is not sorted (:issue:`39805`) -- Bug in :class:`core.window.RollingGroupby` where ``as_index`` argument in ``groupby`` was ignored (:issue:`39433`) +- Bug in :class:`core.window.RollingGroupby` where ``as_index=False`` argument in ``groupby`` was ignored (:issue:`39433`) Reshaping ^^^^^^^^^ From 3fa81016c0eba586edf9d4cb22dff7b0b4fab354 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 8 Apr 2021 13:27:33 -0700 Subject: [PATCH 4/4] Parameterize --- pandas/tests/window/test_groupby.py | 41 +++++++++++++++++------------ 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 32d0747d2874c..51a6288598c32 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -732,7 +732,25 @@ def test_groupby_level(self): ) tm.assert_series_equal(result, expected) - def test_as_index_false(self): + @pytest.mark.parametrize( + "by, expected_data", + [ + [["id"], {"num": [100.0, 150.0, 150.0, 200.0]}], + [ + ["id", "index"], + { + "date": [ + Timestamp("2018-01-01"), + Timestamp("2018-01-02"), + Timestamp("2018-01-01"), + Timestamp("2018-01-02"), + ], + "num": [100.0, 200.0, 150.0, 250.0], + }, + ], + ], + ) + def test_as_index_false(self, by, expected_data): # GH 39433 data = [ ["A", "2018-01-01", 100.0], @@ -744,26 +762,15 @@ def test_as_index_false(self): df["date"] = to_datetime(df["date"]) df = df.set_index(["date"]) + gp_by = [getattr(df, attr) for attr in by] result = ( - df.groupby([df.id], as_index=False).rolling(window=2, min_periods=1).mean() - ) - expected = DataFrame( - {"id": ["A", "A", "B", "B"], "num": [100.0, 150.0, 150.0, 200.0]}, - index=df.index, + df.groupby(gp_by, as_index=False).rolling(window=2, min_periods=1).mean() ) - tm.assert_frame_equal(result, expected) - result = ( - df.groupby([df.id, df.index.weekday], as_index=False) - .rolling(window=2, min_periods=1) - .mean() - ) + expected = {"id": ["A", "A", "B", "B"]} + expected.update(expected_data) expected = DataFrame( - { - "id": ["A", "A", "B", "B"], - "date": [0, 1, 0, 1], - "num": [100.0, 200.0, 150.0, 250.0], - }, + expected, index=df.index, ) tm.assert_frame_equal(result, expected)