Skip to content

Commit dd353a1

Browse files
Backport PR #39191: Revert "BUG/REG: RollingGroupby MultiIndex levels dropped (#38737)" (#39198)
1 parent 6d599cb commit dd353a1

File tree

4 files changed

+23
-55
lines changed

4 files changed

+23
-55
lines changed

doc/source/whatsnew/v1.2.1.rst

-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`)
18-
- Fixed regression in ``groupby().rolling()`` where :class:`MultiIndex` levels were dropped (:issue:`38523`)
1918
- Fixed regression in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
2019
- Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`)
2120
- Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`)

pandas/core/shared_docs.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@
108108
Note this does not influence the order of observations within each
109109
group. Groupby preserves the order of rows within each group.
110110
group_keys : bool, default True
111-
When calling ``groupby().apply()``, add group keys to index to identify pieces.
111+
When calling apply, add group keys to index to identify pieces.
112112
squeeze : bool, default False
113113
Reduce the dimensionality of the return type if possible,
114114
otherwise return a consistent type.

pandas/core/window/rolling.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -767,22 +767,28 @@ def _apply(
767767
numba_cache_key,
768768
**kwargs,
769769
)
770-
# Reconstruct the resulting MultiIndex
770+
# Reconstruct the resulting MultiIndex from tuples
771771
# 1st set of levels = group by labels
772-
# 2nd set of levels = original DataFrame/Series index
773-
grouped_object_index = self.obj.index
774-
grouped_index_name = [*grouped_object_index.names]
775-
groupby_keys = [grouping.name for grouping in self._groupby.grouper._groupings]
776-
result_index_names = groupby_keys + grouped_index_name
772+
# 2nd set of levels = original index
773+
# Ignore 2nd set of levels if a group by label include an index level
774+
result_index_names = [
775+
grouping.name for grouping in self._groupby.grouper._groupings
776+
]
777+
grouped_object_index = None
777778

778-
drop_columns = [
779+
column_keys = [
779780
key
780-
for key in groupby_keys
781+
for key in result_index_names
781782
if key not in self.obj.index.names or key is None
782783
]
783-
if len(drop_columns) != len(groupby_keys):
784-
# Our result will have kept groupby columns which should be dropped
785-
result = result.drop(columns=drop_columns, errors="ignore")
784+
785+
if len(column_keys) == len(result_index_names):
786+
grouped_object_index = self.obj.index
787+
grouped_index_name = [*grouped_object_index.names]
788+
result_index_names += grouped_index_name
789+
else:
790+
# Our result will have still kept the column in the result
791+
result = result.drop(columns=column_keys, errors="ignore")
786792

787793
codes = self._groupby.grouper.codes
788794
levels = self._groupby.grouper.levels

pandas/tests/window/test_groupby.py

+5-42
Original file line numberDiff line numberDiff line change
@@ -556,31 +556,23 @@ def test_groupby_rolling_nans_in_index(self, rollings, key):
556556
with pytest.raises(ValueError, match=f"{key} must be monotonic"):
557557
df.groupby("c").rolling("60min", **rollings)
558558

559-
@pytest.mark.parametrize("group_keys", [True, False])
560-
def test_groupby_rolling_group_keys(self, group_keys):
559+
def test_groupby_rolling_group_keys(self):
561560
# GH 37641
562-
# GH 38523: GH 37641 actually was not a bug.
563-
# group_keys only applies to groupby.apply directly
564561
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
565562
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
566563

567564
s = Series([1, 2, 3], index=index)
568-
result = s.groupby(["idx1", "idx2"], group_keys=group_keys).rolling(1).mean()
565+
result = s.groupby(["idx1", "idx2"], group_keys=False).rolling(1).mean()
569566
expected = Series(
570567
[1.0, 2.0, 3.0],
571568
index=MultiIndex.from_tuples(
572-
[
573-
("val1", "val1", "val1", "val1"),
574-
("val1", "val1", "val1", "val1"),
575-
("val2", "val2", "val2", "val2"),
576-
],
577-
names=["idx1", "idx2", "idx1", "idx2"],
569+
[("val1", "val1"), ("val1", "val1"), ("val2", "val2")],
570+
names=["idx1", "idx2"],
578571
),
579572
)
580573
tm.assert_series_equal(result, expected)
581574

582575
def test_groupby_rolling_index_level_and_column_label(self):
583-
# The groupby keys should not appear as a resulting column
584576
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
585577
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
586578

@@ -589,12 +581,7 @@ def test_groupby_rolling_index_level_and_column_label(self):
589581
expected = DataFrame(
590582
{"B": [0.0, 1.0, 2.0]},
591583
index=MultiIndex.from_tuples(
592-
[
593-
("val1", 1, "val1", "val1"),
594-
("val1", 1, "val1", "val1"),
595-
("val2", 2, "val2", "val2"),
596-
],
597-
names=["idx1", "A", "idx1", "idx2"],
584+
[("val1", 1), ("val1", 1), ("val2", 2)], names=["idx1", "A"]
598585
),
599586
)
600587
tm.assert_frame_equal(result, expected)
@@ -653,30 +640,6 @@ def test_groupby_rolling_resulting_multiindex(self):
653640
)
654641
tm.assert_index_equal(result.index, expected_index)
655642

656-
def test_groupby_level(self):
657-
# GH 38523
658-
arrays = [
659-
["Falcon", "Falcon", "Parrot", "Parrot"],
660-
["Captive", "Wild", "Captive", "Wild"],
661-
]
662-
index = MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
663-
df = DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
664-
result = df.groupby(level=0)["Max Speed"].rolling(2).sum()
665-
expected = Series(
666-
[np.nan, 740.0, np.nan, 50.0],
667-
index=MultiIndex.from_tuples(
668-
[
669-
("Falcon", "Falcon", "Captive"),
670-
("Falcon", "Falcon", "Wild"),
671-
("Parrot", "Parrot", "Captive"),
672-
("Parrot", "Parrot", "Wild"),
673-
],
674-
names=["Animal", "Animal", "Type"],
675-
),
676-
name="Max Speed",
677-
)
678-
tm.assert_series_equal(result, expected)
679-
680643

681644
class TestExpanding:
682645
def setup_method(self):

0 commit comments

Comments
 (0)