Skip to content

Commit 1778605

Browse files
simonjayhawkinsluckyvs1
authored andcommitted
Revert "BUG/REG: RollingGroupby MultiIndex levels dropped (pandas-dev#38737)" (pandas-dev#39191)
This reverts commit a37f1a4.
1 parent 2d27bff commit 1778605

File tree

4 files changed

+23
-55
lines changed

4 files changed

+23
-55
lines changed

doc/source/whatsnew/v1.2.1.rst

-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`)
18-
- Fixed regression in ``groupby().rolling()`` where :class:`MultiIndex` levels were dropped (:issue:`38523`)
1918
- Fixed regression in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
2019
- Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`)
2120
- Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`)

pandas/core/shared_docs.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@
108108
Note this does not influence the order of observations within each
109109
group. Groupby preserves the order of rows within each group.
110110
group_keys : bool, default True
111-
When calling ``groupby().apply()``, add group keys to index to identify pieces.
111+
When calling apply, add group keys to index to identify pieces.
112112
squeeze : bool, default False
113113
Reduce the dimensionality of the return type if possible,
114114
otherwise return a consistent type.

pandas/core/window/rolling.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -794,22 +794,28 @@ def _apply(
794794
numba_cache_key,
795795
**kwargs,
796796
)
797-
# Reconstruct the resulting MultiIndex
797+
# Reconstruct the resulting MultiIndex from tuples
798798
# 1st set of levels = group by labels
799-
# 2nd set of levels = original DataFrame/Series index
800-
grouped_object_index = self.obj.index
801-
grouped_index_name = [*grouped_object_index.names]
802-
groupby_keys = [grouping.name for grouping in self._groupby.grouper._groupings]
803-
result_index_names = groupby_keys + grouped_index_name
799+
# 2nd set of levels = original index
800+
# Ignore 2nd set of levels if a group by label include an index level
801+
result_index_names = [
802+
grouping.name for grouping in self._groupby.grouper._groupings
803+
]
804+
grouped_object_index = None
804805

805-
drop_columns = [
806+
column_keys = [
806807
key
807-
for key in groupby_keys
808+
for key in result_index_names
808809
if key not in self.obj.index.names or key is None
809810
]
810-
if len(drop_columns) != len(groupby_keys):
811-
# Our result will have kept groupby columns which should be dropped
812-
result = result.drop(columns=drop_columns, errors="ignore")
811+
812+
if len(column_keys) == len(result_index_names):
813+
grouped_object_index = self.obj.index
814+
grouped_index_name = [*grouped_object_index.names]
815+
result_index_names += grouped_index_name
816+
else:
817+
# Our result will have still kept the column in the result
818+
result = result.drop(columns=column_keys, errors="ignore")
813819

814820
codes = self._groupby.grouper.codes
815821
levels = self._groupby.grouper.levels

pandas/tests/window/test_groupby.py

+5-42
Original file line numberDiff line numberDiff line change
@@ -556,31 +556,23 @@ def test_groupby_rolling_nans_in_index(self, rollings, key):
556556
with pytest.raises(ValueError, match=f"{key} must be monotonic"):
557557
df.groupby("c").rolling("60min", **rollings)
558558

559-
@pytest.mark.parametrize("group_keys", [True, False])
560-
def test_groupby_rolling_group_keys(self, group_keys):
559+
def test_groupby_rolling_group_keys(self):
561560
# GH 37641
562-
# GH 38523: GH 37641 actually was not a bug.
563-
# group_keys only applies to groupby.apply directly
564561
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
565562
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
566563

567564
s = Series([1, 2, 3], index=index)
568-
result = s.groupby(["idx1", "idx2"], group_keys=group_keys).rolling(1).mean()
565+
result = s.groupby(["idx1", "idx2"], group_keys=False).rolling(1).mean()
569566
expected = Series(
570567
[1.0, 2.0, 3.0],
571568
index=MultiIndex.from_tuples(
572-
[
573-
("val1", "val1", "val1", "val1"),
574-
("val1", "val1", "val1", "val1"),
575-
("val2", "val2", "val2", "val2"),
576-
],
577-
names=["idx1", "idx2", "idx1", "idx2"],
569+
[("val1", "val1"), ("val1", "val1"), ("val2", "val2")],
570+
names=["idx1", "idx2"],
578571
),
579572
)
580573
tm.assert_series_equal(result, expected)
581574

582575
def test_groupby_rolling_index_level_and_column_label(self):
583-
# The groupby keys should not appear as a resulting column
584576
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
585577
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
586578

@@ -589,12 +581,7 @@ def test_groupby_rolling_index_level_and_column_label(self):
589581
expected = DataFrame(
590582
{"B": [0.0, 1.0, 2.0]},
591583
index=MultiIndex.from_tuples(
592-
[
593-
("val1", 1, "val1", "val1"),
594-
("val1", 1, "val1", "val1"),
595-
("val2", 2, "val2", "val2"),
596-
],
597-
names=["idx1", "A", "idx1", "idx2"],
584+
[("val1", 1), ("val1", 1), ("val2", 2)], names=["idx1", "A"]
598585
),
599586
)
600587
tm.assert_frame_equal(result, expected)
@@ -653,30 +640,6 @@ def test_groupby_rolling_resulting_multiindex(self):
653640
)
654641
tm.assert_index_equal(result.index, expected_index)
655642

656-
def test_groupby_level(self):
657-
# GH 38523
658-
arrays = [
659-
["Falcon", "Falcon", "Parrot", "Parrot"],
660-
["Captive", "Wild", "Captive", "Wild"],
661-
]
662-
index = MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
663-
df = DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
664-
result = df.groupby(level=0)["Max Speed"].rolling(2).sum()
665-
expected = Series(
666-
[np.nan, 740.0, np.nan, 50.0],
667-
index=MultiIndex.from_tuples(
668-
[
669-
("Falcon", "Falcon", "Captive"),
670-
("Falcon", "Falcon", "Wild"),
671-
("Parrot", "Parrot", "Captive"),
672-
("Parrot", "Parrot", "Wild"),
673-
],
674-
names=["Animal", "Animal", "Type"],
675-
),
676-
name="Max Speed",
677-
)
678-
tm.assert_series_equal(result, expected)
679-
680643

681644
class TestExpanding:
682645
def setup_method(self):

0 commit comments

Comments
 (0)