Skip to content

Backport PR #44824 on branch 1.3.x (BUG: Fix regression in groupby.rolling.corr/cov when other is same size as each group) #44848

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Fixed regressions
- Fixed performance regression in :func:`read_csv` (:issue:`44106`)
- Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`)
- Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`)
-
- Fixed regression in :meth:`.RollingGroupby.cov` and :meth:`.RollingGroupby.corr` when ``other`` had the same shape as each group would incorrectly return superfluous groups in the result (:issue:`42915`)

.. ---------------------------------------------------------------------------

Expand Down
12 changes: 7 additions & 5 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,8 +658,11 @@ def _apply_pairwise(
target = self._create_data(target)
result = super()._apply_pairwise(target, other, pairwise, func)
# 1) Determine the levels + codes of the groupby levels
if other is not None:
# When we have other, we must reindex (expand) the result
if other is not None and not all(
len(group) == len(other) for group in self._grouper.indices.values()
):
# GH 42915
# len(other) != len(any group), so must reindex (expand) the result
# from flex_binary_moment to a "transform"-like result
# per groupby combination
old_result_len = len(result)
Expand All @@ -681,10 +684,9 @@ def _apply_pairwise(
codes, levels = factorize(labels)
groupby_codes.append(codes)
groupby_levels.append(levels)

else:
# When we evaluate the pairwise=True result, repeat the groupby
# labels by the number of columns in the original object
# pairwise=True or len(other) == len(each group), so repeat
# the groupby labels by the number of columns in the original object
groupby_codes = self._grouper.codes
# error: Incompatible types in assignment (expression has type
# "List[Index]", variable has type "List[Union[ndarray, Index]]")
Expand Down
32 changes: 31 additions & 1 deletion pandas/tests/window/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,33 @@ def test_rolling_quantile(self, interpolation):
expected.index = expected_index
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("f, expected_val", [["corr", 1], ["cov", 0.5]])
def test_rolling_corr_cov_other_same_size_as_groups(self, f, expected_val):
# GH 42915
df = DataFrame(
{"value": range(10), "idx1": [1] * 5 + [2] * 5, "idx2": [1, 2, 3, 4, 5] * 2}
).set_index(["idx1", "idx2"])
other = DataFrame({"value": range(5), "idx2": [1, 2, 3, 4, 5]}).set_index(
"idx2"
)
result = getattr(df.groupby(level=0).rolling(2), f)(other)
expected_data = ([np.nan] + [expected_val] * 4) * 2
expected = DataFrame(
expected_data,
columns=["value"],
index=MultiIndex.from_arrays(
[
[1] * 5 + [2] * 5,
[1] * 5 + [2] * 5,
list(range(1, 6)) * 2,
],
names=["idx1", "idx1", "idx2"],
),
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("f", ["corr", "cov"])
def test_rolling_corr_cov(self, f):
def test_rolling_corr_cov_other_diff_size_as_groups(self, f):
g = self.frame.groupby("A")
r = g.rolling(window=4)

Expand All @@ -138,6 +163,11 @@ def func(x):
expected["A"] = np.nan
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("f", ["corr", "cov"])
def test_rolling_corr_cov_pairwise(self, f):
g = self.frame.groupby("A")
r = g.rolling(window=4)

result = getattr(r.B, f)(pairwise=True)

def func(x):
Expand Down