Backport PR pandas-dev#37661 on branch 1.1.x: BUG: RollingGroupby when groupby key is in the index

mroeschke · simonjayhawkins · commit f9e41df3cf47 · 2020-11-10T13:37:46.000Z
diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst
@@ -25,6 +25,7 @@ Bug fixes
 ~~~~~~~~~
 - Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`)
 - Bug in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`)
+- Bug in :class:`RollingGroupby` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -2193,20 +2193,39 @@ def _apply(
             use_numba_cache,
             **kwargs,
         )
-        # Cannot use _wrap_outputs because we calculate the result all at once
-        # Compose MultiIndex result from grouping levels then rolling level
-        # Aggregate the MultiIndex data as tuples then the level names
-        grouped_object_index = self.obj.index
-        grouped_index_name = [*grouped_object_index.names]
-        groupby_keys = [grouping.name for grouping in self._groupby.grouper._groupings]
-        result_index_names = groupby_keys + grouped_index_name
+        # Reconstruct the resulting MultiIndex from tuples
+        # 1st set of levels = group by labels
+        # 2nd set of levels = original index
+        # Ignore 2nd set of levels if a group by label include an index level
+        result_index_names = [
+            grouping.name for grouping in self._groupby.grouper._groupings
+        ]
+        grouped_object_index = None
+
+        column_keys = [
+            key
+            for key in result_index_names
+            if key not in self.obj.index.names or key is None
+        ]
+
+        if len(column_keys) == len(result_index_names):
+            grouped_object_index = self.obj.index
+            grouped_index_name = [*grouped_object_index.names]
+            result_index_names += grouped_index_name
+        else:
+            # Our result will have still kept the column in the result
+            result = result.drop(columns=column_keys, errors="ignore")
 
         result_index_data = []
         for key, values in self._groupby.grouper.indices.items():
             for value in values:
                 data = [
                     *com.maybe_make_list(key),
-                    *com.maybe_make_list(grouped_object_index[value]),
+                    *com.maybe_make_list(
+                        grouped_object_index[value]
+                        if grouped_object_index is not None
+                        else []
+                    ),
                 ]
                 result_index_data.append(tuple(data))
 
diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py
@@ -2,7 +2,7 @@
 import pytest
 
 import pandas as pd
-from pandas import DataFrame, Series
+from pandas import DataFrame, MultiIndex, Series
 import pandas._testing as tm
 from pandas.core.groupby.groupby import get_groupby
 
@@ -449,3 +449,33 @@ def test_groupby_rolling_no_sort(self):
             index=pd.MultiIndex.from_tuples([(2, 0), (1, 1)], names=["foo", None]),
         )
         tm.assert_frame_equal(result, expected)
+
+    def test_groupby_rolling_group_keys(self):
+        # GH 37641
+        arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
+        index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
+
+        s = Series([1, 2, 3], index=index)
+        result = s.groupby(["idx1", "idx2"], group_keys=False).rolling(1).mean()
+        expected = Series(
+            [1.0, 2.0, 3.0],
+            index=MultiIndex.from_tuples(
+                [("val1", "val1"), ("val1", "val1"), ("val2", "val2")],
+                names=["idx1", "idx2"],
+            ),
+        )
+        tm.assert_series_equal(result, expected)
+
+    def test_groupby_rolling_index_level_and_column_label(self):
+        arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
+        index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
+
+        df = DataFrame({"A": [1, 1, 2], "B": range(3)}, index=index)
+        result = df.groupby(["idx1", "A"]).rolling(1).mean()
+        expected = DataFrame(
+            {"B": [0.0, 1.0, 2.0]},
+            index=MultiIndex.from_tuples(
+                [("val1", 1), ("val1", 1), ("val2", 2)], names=["idx1", "A"]
+            ),
+        )
+        tm.assert_frame_equal(result, expected)

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,7 @@ Bug fixes`
`25`	`25`	`~~~~~~~~~`
`26`	`26`	- Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`)
`27`	`27`	- Bug in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`)
	`28`	+- Bug in :class:`RollingGroupby` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`)
`28`	`29`	`-`
`29`	`30`
`30`	`31`	`.. ---------------------------------------------------------------------------`