Skip to content

Commit f698dab

Browse files
mroeschkeyehoshuadimarsky
authored andcommitted
Revert "BUG: groupby().rolling(freq) with monotonic dates within groups (pandas-dev#46065)" (pandas-dev#46078)
This reverts commit ad0f1bf.
1 parent 3388bb0 commit f698dab

File tree

5 files changed

+91
-96
lines changed

5 files changed

+91
-96
lines changed

doc/source/whatsnew/v1.4.2.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Fixed regressions
2323

2424
Bug fixes
2525
~~~~~~~~~
26-
- Bug in :meth:`Groupby.rolling` with a frequency window would raise a ``ValueError`` even if the datetimes within each group were monotonic (:issue:`46061`)
26+
-
2727
-
2828

2929
.. ---------------------------------------------------------------------------

pandas/core/window/rolling.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,7 @@ def _gotitem(self, key, ndim, subset=None):
837837
subset = self.obj.set_index(self._on)
838838
return super()._gotitem(key, ndim, subset=subset)
839839

840-
def _validate_datetimelike_monotonic(self):
840+
def _validate_monotonic(self):
841841
"""
842842
Validate that "on" is monotonic; already validated at a higher level.
843843
"""
@@ -1687,7 +1687,7 @@ def _validate(self):
16871687
or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex))
16881688
) and isinstance(self.window, (str, BaseOffset, timedelta)):
16891689

1690-
self._validate_datetimelike_monotonic()
1690+
self._validate_monotonic()
16911691

16921692
# this will raise ValueError on non-fixed freqs
16931693
try:
@@ -1712,13 +1712,18 @@ def _validate(self):
17121712
elif not is_integer(self.window) or self.window < 0:
17131713
raise ValueError("window must be an integer 0 or greater")
17141714

1715-
def _validate_datetimelike_monotonic(self):
1715+
def _validate_monotonic(self):
17161716
"""
17171717
Validate monotonic (increasing or decreasing).
17181718
"""
17191719
if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
1720-
on = "index" if self.on is None else self.on
1721-
raise ValueError(f"{on} must be monotonic.")
1720+
self._raise_monotonic_error()
1721+
1722+
def _raise_monotonic_error(self):
1723+
formatted = self.on
1724+
if self.on is None:
1725+
formatted = "index"
1726+
raise ValueError(f"{formatted} must be monotonic")
17221727

17231728
@doc(
17241729
_shared_docs["aggregate"],
@@ -2626,17 +2631,12 @@ def _get_window_indexer(self) -> GroupbyIndexer:
26262631
)
26272632
return window_indexer
26282633

2629-
def _validate_datetimelike_monotonic(self):
2634+
def _validate_monotonic(self):
26302635
"""
2631-
Validate that each group in self._on is monotonic
2636+
Validate that on is monotonic;
26322637
"""
2633-
# GH 46061
2634-
on = "index" if self.on is None else self.on
2635-
if self._on.hasnans:
2636-
raise ValueError(f"{on} must not have any NaT values.")
2637-
for group_indices in self._grouper.indices.values():
2638-
group_on = self._on.take(group_indices)
2639-
if not (
2640-
group_on.is_monotonic_increasing or group_on.is_monotonic_decreasing
2641-
):
2642-
raise ValueError(f"Each group within {on} must be monotonic.")
2638+
if (
2639+
not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing)
2640+
or self._on.hasnans
2641+
):
2642+
self._raise_monotonic_error()

pandas/tests/window/test_groupby.py

+1-78
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,7 @@ def test_groupby_rolling_nans_in_index(self, rollings, key):
678678
)
679679
if key == "index":
680680
df = df.set_index("a")
681-
with pytest.raises(ValueError, match=f"{key} must not have any NaT values"):
681+
with pytest.raises(ValueError, match=f"{key} must be monotonic"):
682682
df.groupby("c").rolling("60min", **rollings)
683683

684684
@pytest.mark.parametrize("group_keys", [True, False])
@@ -922,83 +922,6 @@ def test_nan_and_zero_endpoints(self):
922922
)
923923
tm.assert_series_equal(result, expected)
924924

925-
def test_groupby_rolling_non_monotonic(self):
926-
# GH 43909
927-
928-
shuffled = [3, 0, 1, 2]
929-
sec = 1_000
930-
df = DataFrame(
931-
[{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
932-
)
933-
with pytest.raises(ValueError, match=r".* must be monotonic"):
934-
df.groupby("c").rolling(on="t", window="3s")
935-
936-
def test_groupby_monotonic(self):
937-
938-
# GH 15130
939-
# we don't need to validate monotonicity when grouping
940-
941-
# GH 43909 we should raise an error here to match
942-
# behaviour of non-groupby rolling.
943-
944-
data = [
945-
["David", "1/1/2015", 100],
946-
["David", "1/5/2015", 500],
947-
["David", "5/30/2015", 50],
948-
["David", "7/25/2015", 50],
949-
["Ryan", "1/4/2014", 100],
950-
["Ryan", "1/19/2015", 500],
951-
["Ryan", "3/31/2016", 50],
952-
["Joe", "7/1/2015", 100],
953-
["Joe", "9/9/2015", 500],
954-
["Joe", "10/15/2015", 50],
955-
]
956-
957-
df = DataFrame(data=data, columns=["name", "date", "amount"])
958-
df["date"] = to_datetime(df["date"])
959-
df = df.sort_values("date")
960-
961-
expected = (
962-
df.set_index("date")
963-
.groupby("name")
964-
.apply(lambda x: x.rolling("180D")["amount"].sum())
965-
)
966-
result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
967-
tm.assert_series_equal(result, expected)
968-
969-
def test_datelike_on_monotonic_within_each_group(self):
970-
# GH 13966 (similar to #15130, closed by #15175)
971-
972-
# superseded by 43909
973-
# GH 46061: OK if the on is monotonic relative to each each group
974-
975-
dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
976-
df = DataFrame(
977-
{
978-
"A": [1] * 20 + [2] * 12 + [3] * 8,
979-
"B": np.concatenate((dates, dates)),
980-
"C": np.arange(40),
981-
}
982-
)
983-
984-
expected = (
985-
df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
986-
)
987-
result = df.groupby("A").rolling("4s", on="B").C.mean()
988-
tm.assert_series_equal(result, expected)
989-
990-
def test_datelike_on_not_monotonic_within_each_group(self):
991-
# GH 46061
992-
df = DataFrame(
993-
{
994-
"A": [1] * 3 + [2] * 3,
995-
"B": [Timestamp(year, 1, 1) for year in [2020, 2021, 2019]] * 2,
996-
"C": range(6),
997-
}
998-
)
999-
with pytest.raises(ValueError, match="Each group within B must be monotonic."):
1000-
df.groupby("A").rolling("365D", on="B")
1001-
1002925

1003926
class TestExpanding:
1004927
def setup_method(self):

pandas/tests/window/test_rolling.py

+12
Original file line numberDiff line numberDiff line change
@@ -1420,6 +1420,18 @@ def test_groupby_rolling_nan_included():
14201420
tm.assert_frame_equal(result, expected)
14211421

14221422

1423+
def test_groupby_rolling_non_monotonic():
1424+
# GH 43909
1425+
1426+
shuffled = [3, 0, 1, 2]
1427+
sec = 1_000
1428+
df = DataFrame(
1429+
[{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
1430+
)
1431+
with pytest.raises(ValueError, match=r".* must be monotonic"):
1432+
df.groupby("c").rolling(on="t", window="3s")
1433+
1434+
14231435
@pytest.mark.parametrize("method", ["skew", "kurt"])
14241436
def test_rolling_skew_kurt_numerical_stability(method):
14251437
# GH#6929

pandas/tests/window/test_timeseries_window.py

+60
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
Series,
99
Timestamp,
1010
date_range,
11+
to_datetime,
1112
)
1213
import pandas._testing as tm
1314

@@ -647,6 +648,65 @@ def agg_by_day(x):
647648

648649
tm.assert_frame_equal(result, expected)
649650

651+
def test_groupby_monotonic(self):
652+
653+
# GH 15130
654+
# we don't need to validate monotonicity when grouping
655+
656+
# GH 43909 we should raise an error here to match
657+
# behaviour of non-groupby rolling.
658+
659+
data = [
660+
["David", "1/1/2015", 100],
661+
["David", "1/5/2015", 500],
662+
["David", "5/30/2015", 50],
663+
["David", "7/25/2015", 50],
664+
["Ryan", "1/4/2014", 100],
665+
["Ryan", "1/19/2015", 500],
666+
["Ryan", "3/31/2016", 50],
667+
["Joe", "7/1/2015", 100],
668+
["Joe", "9/9/2015", 500],
669+
["Joe", "10/15/2015", 50],
670+
]
671+
672+
df = DataFrame(data=data, columns=["name", "date", "amount"])
673+
df["date"] = to_datetime(df["date"])
674+
df = df.sort_values("date")
675+
676+
expected = (
677+
df.set_index("date")
678+
.groupby("name")
679+
.apply(lambda x: x.rolling("180D")["amount"].sum())
680+
)
681+
result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
682+
tm.assert_series_equal(result, expected)
683+
684+
def test_non_monotonic_raises(self):
685+
# GH 13966 (similar to #15130, closed by #15175)
686+
687+
# superseded by 43909
688+
689+
dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
690+
df = DataFrame(
691+
{
692+
"A": [1] * 20 + [2] * 12 + [3] * 8,
693+
"B": np.concatenate((dates, dates)),
694+
"C": np.arange(40),
695+
}
696+
)
697+
698+
expected = (
699+
df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
700+
)
701+
with pytest.raises(ValueError, match=r".* must be monotonic"):
702+
df.groupby("A").rolling(
703+
"4s", on="B"
704+
).C.mean() # should raise for non-monotonic t series
705+
706+
df2 = df.sort_values("B")
707+
result = df2.groupby("A").rolling("4s", on="B").C.mean()
708+
tm.assert_series_equal(result, expected)
709+
650710
def test_rolling_cov_offset(self):
651711
# GH16058
652712

0 commit comments

Comments
 (0)