Skip to content

BUG: Fix Rolling where duplicate datetimelike indexes are treated as consecutive rather than equal with closed='left' and closed='neither' #54917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ Performance improvements
Bug fixes
~~~~~~~~~
- Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`)
- Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`)

Categorical
^^^^^^^^^^^
Expand Down
7 changes: 6 additions & 1 deletion pandas/_libs/window/indexers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,12 @@ def calculate_variable_window_bounds(
break
# end bound is previous end
# or current index
elif (index[end[i - 1]] - end_bound) * index_growth_sign <= 0:
elif index[end[i - 1]] == end_bound:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you make a similar test for VariableOffsetWindowIndexer.get_window_bounds and include a test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

if right_closed:
end[i] = i + 1
else:
end[i] = end[i - 1] + 1
elif (index[end[i - 1]] - end_bound) * index_growth_sign < 0:
end[i] = i + 1
else:
end[i] = end[i - 1]
Expand Down
38 changes: 22 additions & 16 deletions pandas/tests/window/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,20 +466,23 @@ def test_groupby_rolling_subset_with_closed(self):
# GH 35549
df = DataFrame(
{
"column1": range(6),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Modified these two tests as the result would be list of nan values

"column2": range(6),
"group": 3 * ["A", "B"],
"date": [Timestamp("2019-01-01")] * 6,
"column1": range(8),
"column2": range(8),
"group": ["A"] * 4 + ["B"] * 4,
"date": [
Timestamp(date)
for date in ["2019-01-01", "2019-01-01", "2019-01-02", "2019-01-02"]
]
* 2,
}
)
result = (
df.groupby("group").rolling("1D", on="date", closed="left")["column1"].sum()
)
expected = Series(
[np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
index=MultiIndex.from_tuples(
[("A", Timestamp("2019-01-01"))] * 3
+ [("B", Timestamp("2019-01-01"))] * 3,
[np.nan, np.nan, 1.0, 1.0, np.nan, np.nan, 9.0, 9.0],
index=MultiIndex.from_frame(
df[["group", "date"]],
names=["group", "date"],
),
name="column1",
Expand All @@ -490,10 +493,14 @@ def test_groupby_subset_rolling_subset_with_closed(self):
# GH 35549
df = DataFrame(
{
"column1": range(6),
"column2": range(6),
"group": 3 * ["A", "B"],
"date": [Timestamp("2019-01-01")] * 6,
"column1": range(8),
"column2": range(8),
"group": ["A"] * 4 + ["B"] * 4,
"date": [
Timestamp(date)
for date in ["2019-01-01", "2019-01-01", "2019-01-02", "2019-01-02"]
]
* 2,
}
)

Expand All @@ -503,10 +510,9 @@ def test_groupby_subset_rolling_subset_with_closed(self):
.sum()
)
expected = Series(
[np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
index=MultiIndex.from_tuples(
[("A", Timestamp("2019-01-01"))] * 3
+ [("B", Timestamp("2019-01-01"))] * 3,
[np.nan, np.nan, 1.0, 1.0, np.nan, np.nan, 9.0, 9.0],
index=MultiIndex.from_frame(
df[["group", "date"]],
names=["group", "date"],
),
name="column1",
Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,38 @@ def test_datetimelike_nonunique_index_centering(
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"closed,expected",
[
("left", [np.nan, np.nan, 1, 1, 1, 10, 14, 14]),
("neither", [np.nan, np.nan, 1, 1, 1, 9, 5, 5]),
("right", [0, 1, 3, 6, 10, 14, 11, 18]),
("both", [0, 1, 3, 6, 10, 15, 20, 27]),
],
)
def test_datetimelike_nonunique(closed, expected, frame_or_series):
# GH 20712
index = DatetimeIndex(
[
"2011-01-01",
"2011-01-01",
"2011-01-02",
"2011-01-02",
"2011-01-02",
"2011-01-03",
"2011-01-04",
"2011-01-04",
]
)

df = frame_or_series(range(8), index=index, dtype=float)
expected = frame_or_series(expected, index=index, dtype=float)

result = df.rolling("2D", closed=closed).sum()

tm.assert_equal(result, expected)


def test_even_number_window_alignment():
# see discussion in GH 38780
s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3))
Expand Down