diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index e0fa68e3b9f80..a29ae1912e338 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -25,6 +25,7 @@ Bug fixes ~~~~~~~~~ - Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`) - Bug in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`) +- Bug in :class:`RollingGroupby` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index a976350a419fe..5d561c84ab462 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -762,19 +762,39 @@ def _apply( use_numba_cache, **kwargs, ) - # Compose MultiIndex result from grouping levels then rolling level - # Aggregate the MultiIndex data as tuples then the level names - grouped_object_index = self.obj.index - grouped_index_name = [*grouped_object_index.names] - groupby_keys = [grouping.name for grouping in self._groupby.grouper._groupings] - result_index_names = groupby_keys + grouped_index_name + # Reconstruct the resulting MultiIndex from tuples + # 1st set of levels = group by labels + # 2nd set of levels = original index + # Ignore 2nd set of levels if a group by label include an index level + result_index_names = [ + grouping.name for grouping in self._groupby.grouper._groupings + ] + grouped_object_index = None + + column_keys = [ + key + for key in result_index_names + if key not in self.obj.index.names or key is None + ] + + if len(column_keys) == len(result_index_names): + grouped_object_index = self.obj.index + grouped_index_name = [*grouped_object_index.names] + result_index_names += grouped_index_name + else: + # Our result will have still kept the column in the result + result = result.drop(columns=column_keys, errors="ignore") result_index_data = [] for key, values in self._groupby.grouper.indices.items(): for value in values: data = [ *com.maybe_make_list(key), - *com.maybe_make_list(grouped_object_index[value]), + *com.maybe_make_list( + grouped_object_index[value] + if grouped_object_index is not None + else [] + ), ] result_index_data.append(tuple(data)) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 101d65c885c9b..65906df819054 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Series +from pandas import DataFrame, MultiIndex, Series import pandas._testing as tm from pandas.core.groupby.groupby import get_groupby @@ -601,3 +601,33 @@ def test_groupby_rolling_nans_in_index(self, rollings, key): df = df.set_index("a") with pytest.raises(ValueError, match=f"{key} must be monotonic"): df.groupby("c").rolling("60min", **rollings) + + def test_groupby_rolling_group_keys(self): + # GH 37641 + arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]] + index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2")) + + s = Series([1, 2, 3], index=index) + result = s.groupby(["idx1", "idx2"], group_keys=False).rolling(1).mean() + expected = Series( + [1.0, 2.0, 3.0], + index=MultiIndex.from_tuples( + [("val1", "val1"), ("val1", "val1"), ("val2", "val2")], + names=["idx1", "idx2"], + ), + ) + tm.assert_series_equal(result, expected) + + def test_groupby_rolling_index_level_and_column_label(self): + arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]] + index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2")) + + df = DataFrame({"A": [1, 1, 2], "B": range(3)}, index=index) + result = df.groupby(["idx1", "A"]).rolling(1).mean() + expected = DataFrame( + {"B": [0.0, 1.0, 2.0]}, + index=MultiIndex.from_tuples( + [("val1", 1), ("val1", 1), ("val2", 2)], names=["idx1", "A"] + ), + ) + tm.assert_frame_equal(result, expected)