Skip to content

Commit f9e41df

Browse files
mroeschkesimonjayhawkins
authored andcommitted
Backport PR pandas-dev#37661 on branch 1.1.x: BUG: RollingGroupby when groupby key is in the index
1 parent 88945ab commit f9e41df

File tree

3 files changed

+59
-9
lines changed

3 files changed

+59
-9
lines changed

doc/source/whatsnew/v1.1.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Bug fixes
2525
~~~~~~~~~
2626
- Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`)
2727
- Bug in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`)
28+
- Bug in :class:`RollingGroupby` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`)
2829
-
2930

3031
.. ---------------------------------------------------------------------------

pandas/core/window/rolling.py

+27-8
Original file line numberDiff line numberDiff line change
@@ -2193,20 +2193,39 @@ def _apply(
21932193
use_numba_cache,
21942194
**kwargs,
21952195
)
2196-
# Cannot use _wrap_outputs because we calculate the result all at once
2197-
# Compose MultiIndex result from grouping levels then rolling level
2198-
# Aggregate the MultiIndex data as tuples then the level names
2199-
grouped_object_index = self.obj.index
2200-
grouped_index_name = [*grouped_object_index.names]
2201-
groupby_keys = [grouping.name for grouping in self._groupby.grouper._groupings]
2202-
result_index_names = groupby_keys + grouped_index_name
2196+
# Reconstruct the resulting MultiIndex from tuples
2197+
# 1st set of levels = group by labels
2198+
# 2nd set of levels = original index
2199+
# Ignore 2nd set of levels if a group by label include an index level
2200+
result_index_names = [
2201+
grouping.name for grouping in self._groupby.grouper._groupings
2202+
]
2203+
grouped_object_index = None
2204+
2205+
column_keys = [
2206+
key
2207+
for key in result_index_names
2208+
if key not in self.obj.index.names or key is None
2209+
]
2210+
2211+
if len(column_keys) == len(result_index_names):
2212+
grouped_object_index = self.obj.index
2213+
grouped_index_name = [*grouped_object_index.names]
2214+
result_index_names += grouped_index_name
2215+
else:
2216+
# Our result will have still kept the column in the result
2217+
result = result.drop(columns=column_keys, errors="ignore")
22032218

22042219
result_index_data = []
22052220
for key, values in self._groupby.grouper.indices.items():
22062221
for value in values:
22072222
data = [
22082223
*com.maybe_make_list(key),
2209-
*com.maybe_make_list(grouped_object_index[value]),
2224+
*com.maybe_make_list(
2225+
grouped_object_index[value]
2226+
if grouped_object_index is not None
2227+
else []
2228+
),
22102229
]
22112230
result_index_data.append(tuple(data))
22122231

pandas/tests/window/test_grouper.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pytest
33

44
import pandas as pd
5-
from pandas import DataFrame, Series
5+
from pandas import DataFrame, MultiIndex, Series
66
import pandas._testing as tm
77
from pandas.core.groupby.groupby import get_groupby
88

@@ -449,3 +449,33 @@ def test_groupby_rolling_no_sort(self):
449449
index=pd.MultiIndex.from_tuples([(2, 0), (1, 1)], names=["foo", None]),
450450
)
451451
tm.assert_frame_equal(result, expected)
452+
453+
def test_groupby_rolling_group_keys(self):
454+
# GH 37641
455+
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
456+
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
457+
458+
s = Series([1, 2, 3], index=index)
459+
result = s.groupby(["idx1", "idx2"], group_keys=False).rolling(1).mean()
460+
expected = Series(
461+
[1.0, 2.0, 3.0],
462+
index=MultiIndex.from_tuples(
463+
[("val1", "val1"), ("val1", "val1"), ("val2", "val2")],
464+
names=["idx1", "idx2"],
465+
),
466+
)
467+
tm.assert_series_equal(result, expected)
468+
469+
def test_groupby_rolling_index_level_and_column_label(self):
470+
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
471+
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
472+
473+
df = DataFrame({"A": [1, 1, 2], "B": range(3)}, index=index)
474+
result = df.groupby(["idx1", "A"]).rolling(1).mean()
475+
expected = DataFrame(
476+
{"B": [0.0, 1.0, 2.0]},
477+
index=MultiIndex.from_tuples(
478+
[("val1", 1), ("val1", 1), ("val2", 2)], names=["idx1", "A"]
479+
),
480+
)
481+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)