Skip to content

Commit ce34c1c

Browse files
authored
BUG: RollingGroupby MultiIndex levels dropped (pandas-dev#40701)
1 parent 84b3f91 commit ce34c1c

File tree

3 files changed

+84
-18
lines changed

3 files changed

+84
-18
lines changed

doc/source/whatsnew/v1.3.0.rst

+32
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,38 @@ However, floating point artifacts may now exist in the results when rolling over
400400
s = pd.Series([7, 5, 5, 5])
401401
s.rolling(3).var()
402402
403+
.. _whatsnew_130.notable_bug_fixes.rolling_groupby_multiindex:
404+
405+
GroupBy.rolling with MultiIndex no longer drops levels in the result
406+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
407+
408+
:class:`core.window.rolling.RollingGroupby` will no longer drop levels of a :class:`DataFrame`
409+
with a :class:`MultiIndex` in the result. This can lead to a perceived duplication of levels in the resulting
410+
:class:`MultiIndex`, but this change restores the behavior that was present in version 1.1.3 (:issue:`38787`, :issue:`38523`).
411+
412+
413+
.. ipython:: python
414+
415+
index = pd.MultiIndex.from_tuples([('idx1', 'idx2')], names=['label1', 'label2'])
416+
df = pd.DataFrame({'a': [1], 'b': [2]}, index=index)
417+
df
418+
419+
*Previous behavior*:
420+
421+
.. code-block:: ipython
422+
423+
In [1]: df.groupby('label1').rolling(1).sum()
424+
Out[1]:
425+
a b
426+
label1
427+
idx1 1.0 2.0
428+
429+
*New behavior*:
430+
431+
.. ipython:: python
432+
433+
df.groupby('label1').rolling(1).sum()
434+
403435
404436
.. _whatsnew_130.api_breaking.deps:
405437

pandas/core/window/rolling.py

+10-13
Original file line numberDiff line numberDiff line change
@@ -577,26 +577,23 @@ def _apply(
577577
numba_cache_key,
578578
**kwargs,
579579
)
580-
# Reconstruct the resulting MultiIndex from tuples
580+
# Reconstruct the resulting MultiIndex
581581
# 1st set of levels = group by labels
582-
# 2nd set of levels = original index
583-
# Ignore 2nd set of levels if a group by label include an index level
584-
result_index_names = copy.copy(self._grouper.names)
585-
grouped_object_index = None
582+
# 2nd set of levels = original DataFrame/Series index
583+
grouped_object_index = self.obj.index
584+
grouped_index_name = [*grouped_object_index.names]
585+
groupby_keys = copy.copy(self._grouper.names)
586+
result_index_names = groupby_keys + grouped_index_name
586587

587-
column_keys = [
588+
drop_columns = [
588589
key
589-
for key in result_index_names
590+
for key in self._grouper.names
590591
if key not in self.obj.index.names or key is None
591592
]
592593

593-
if len(column_keys) == len(result_index_names):
594-
grouped_object_index = self.obj.index
595-
grouped_index_name = [*grouped_object_index.names]
596-
result_index_names += grouped_index_name
597-
else:
594+
if len(drop_columns) != len(groupby_keys):
598595
# Our result will have still kept the column in the result
599-
result = result.drop(columns=column_keys, errors="ignore")
596+
result = result.drop(columns=drop_columns, errors="ignore")
600597

601598
codes = self._grouper.codes
602599
levels = copy.copy(self._grouper.levels)

pandas/tests/window/test_groupby.py

+42-5
Original file line numberDiff line numberDiff line change
@@ -588,23 +588,31 @@ def test_groupby_rolling_nans_in_index(self, rollings, key):
588588
with pytest.raises(ValueError, match=f"{key} must be monotonic"):
589589
df.groupby("c").rolling("60min", **rollings)
590590

591-
def test_groupby_rolling_group_keys(self):
591+
@pytest.mark.parametrize("group_keys", [True, False])
592+
def test_groupby_rolling_group_keys(self, group_keys):
592593
# GH 37641
594+
# GH 38523: GH 37641 actually was not a bug.
595+
# group_keys only applies to groupby.apply directly
593596
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
594597
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
595598

596599
s = Series([1, 2, 3], index=index)
597-
result = s.groupby(["idx1", "idx2"], group_keys=False).rolling(1).mean()
600+
result = s.groupby(["idx1", "idx2"], group_keys=group_keys).rolling(1).mean()
598601
expected = Series(
599602
[1.0, 2.0, 3.0],
600603
index=MultiIndex.from_tuples(
601-
[("val1", "val1"), ("val1", "val1"), ("val2", "val2")],
602-
names=["idx1", "idx2"],
604+
[
605+
("val1", "val1", "val1", "val1"),
606+
("val1", "val1", "val1", "val1"),
607+
("val2", "val2", "val2", "val2"),
608+
],
609+
names=["idx1", "idx2", "idx1", "idx2"],
603610
),
604611
)
605612
tm.assert_series_equal(result, expected)
606613

607614
def test_groupby_rolling_index_level_and_column_label(self):
615+
# The groupby keys should not appear as a resulting column
608616
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
609617
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
610618

@@ -613,7 +621,12 @@ def test_groupby_rolling_index_level_and_column_label(self):
613621
expected = DataFrame(
614622
{"B": [0.0, 1.0, 2.0]},
615623
index=MultiIndex.from_tuples(
616-
[("val1", 1), ("val1", 1), ("val2", 2)], names=["idx1", "A"]
624+
[
625+
("val1", 1, "val1", "val1"),
626+
("val1", 1, "val1", "val1"),
627+
("val2", 2, "val2", "val2"),
628+
],
629+
names=["idx1", "A", "idx1", "idx2"],
617630
),
618631
)
619632
tm.assert_frame_equal(result, expected)
@@ -695,6 +708,30 @@ def test_by_column_not_in_values(self, columns):
695708
assert "A" not in result.columns
696709
tm.assert_frame_equal(g.obj, original_obj)
697710

711+
def test_groupby_level(self):
712+
# GH 38523, 38787
713+
arrays = [
714+
["Falcon", "Falcon", "Parrot", "Parrot"],
715+
["Captive", "Wild", "Captive", "Wild"],
716+
]
717+
index = MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
718+
df = DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
719+
result = df.groupby(level=0)["Max Speed"].rolling(2).sum()
720+
expected = Series(
721+
[np.nan, 740.0, np.nan, 50.0],
722+
index=MultiIndex.from_tuples(
723+
[
724+
("Falcon", "Falcon", "Captive"),
725+
("Falcon", "Falcon", "Wild"),
726+
("Parrot", "Parrot", "Captive"),
727+
("Parrot", "Parrot", "Wild"),
728+
],
729+
names=["Animal", "Animal", "Type"],
730+
),
731+
name="Max Speed",
732+
)
733+
tm.assert_series_equal(result, expected)
734+
698735

699736
class TestExpanding:
700737
def setup_method(self):

0 commit comments

Comments
 (0)