Skip to content

Commit d2aa44f

Browse files
authored
BUG: groupby().rolling(freq) with monotonic dates within groups #46065 (#46567)
1 parent 382aefc commit d2aa44f

File tree

5 files changed

+96
-72
lines changed

5 files changed

+96
-72
lines changed

doc/source/whatsnew/v1.4.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Bug fixes
3232
- Fix some cases for subclasses that define their ``_constructor`` properties as general callables (:issue:`46018`)
3333
- Fixed "longtable" formatting in :meth:`.Styler.to_latex` when ``column_format`` is given in extended format (:issue:`46037`)
3434
- Fixed incorrect rendering in :meth:`.Styler.format` with ``hyperlinks="html"`` when the url contains a colon or other special characters (:issue:`46389`)
35+
- Fixed :meth:`Groupby.rolling` with a frequency window that would raise a ``ValueError`` even if the datetimes within each group were monotonic (:issue:`46061`)
3536

3637
.. ---------------------------------------------------------------------------
3738

pandas/core/window/rolling.py

+18
Original file line numberDiff line numberDiff line change
@@ -2680,3 +2680,21 @@ def _get_window_indexer(self) -> GroupbyIndexer:
26802680
indexer_kwargs=indexer_kwargs,
26812681
)
26822682
return window_indexer
2683+
2684+
def _validate_datetimelike_monotonic(self):
2685+
"""
2686+
Validate that each group in self._on is monotonic
2687+
"""
2688+
# GH 46061
2689+
if self._on.hasnans:
2690+
self._raise_monotonic_error("values must not have NaT")
2691+
for group_indices in self._grouper.indices.values():
2692+
group_on = self._on.take(group_indices)
2693+
if not (
2694+
group_on.is_monotonic_increasing or group_on.is_monotonic_decreasing
2695+
):
2696+
on = "index" if self.on is None else self.on
2697+
raise ValueError(
2698+
f"Each group within {on} must be monotonic. "
2699+
f"Sort the values in {on} first."
2700+
)

pandas/tests/window/test_groupby.py

+77
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,83 @@ def test_nan_and_zero_endpoints(self):
927927
)
928928
tm.assert_series_equal(result, expected)
929929

930+
def test_groupby_rolling_non_monotonic(self):
931+
# GH 43909
932+
933+
shuffled = [3, 0, 1, 2]
934+
sec = 1_000
935+
df = DataFrame(
936+
[{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
937+
)
938+
with pytest.raises(ValueError, match=r".* must be monotonic"):
939+
df.groupby("c").rolling(on="t", window="3s")
940+
941+
def test_groupby_monotonic(self):
942+
943+
# GH 15130
944+
# we don't need to validate monotonicity when grouping
945+
946+
# GH 43909 we should raise an error here to match
947+
# behaviour of non-groupby rolling.
948+
949+
data = [
950+
["David", "1/1/2015", 100],
951+
["David", "1/5/2015", 500],
952+
["David", "5/30/2015", 50],
953+
["David", "7/25/2015", 50],
954+
["Ryan", "1/4/2014", 100],
955+
["Ryan", "1/19/2015", 500],
956+
["Ryan", "3/31/2016", 50],
957+
["Joe", "7/1/2015", 100],
958+
["Joe", "9/9/2015", 500],
959+
["Joe", "10/15/2015", 50],
960+
]
961+
962+
df = DataFrame(data=data, columns=["name", "date", "amount"])
963+
df["date"] = to_datetime(df["date"])
964+
df = df.sort_values("date")
965+
966+
expected = (
967+
df.set_index("date")
968+
.groupby("name")
969+
.apply(lambda x: x.rolling("180D")["amount"].sum())
970+
)
971+
result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
972+
tm.assert_series_equal(result, expected)
973+
974+
def test_datelike_on_monotonic_within_each_group(self):
975+
# GH 13966 (similar to #15130, closed by #15175)
976+
977+
# superseded by 43909
978+
# GH 46061: OK if the on is monotonic relative to each each group
979+
980+
dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
981+
df = DataFrame(
982+
{
983+
"A": [1] * 20 + [2] * 12 + [3] * 8,
984+
"B": np.concatenate((dates, dates)),
985+
"C": np.arange(40),
986+
}
987+
)
988+
989+
expected = (
990+
df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
991+
)
992+
result = df.groupby("A").rolling("4s", on="B").C.mean()
993+
tm.assert_series_equal(result, expected)
994+
995+
def test_datelike_on_not_monotonic_within_each_group(self):
996+
# GH 46061
997+
df = DataFrame(
998+
{
999+
"A": [1] * 3 + [2] * 3,
1000+
"B": [Timestamp(year, 1, 1) for year in [2020, 2021, 2019]] * 2,
1001+
"C": range(6),
1002+
}
1003+
)
1004+
with pytest.raises(ValueError, match="Each group within B must be monotonic."):
1005+
df.groupby("A").rolling("365D", on="B")
1006+
9301007

9311008
class TestExpanding:
9321009
def setup_method(self):

pandas/tests/window/test_rolling.py

-12
Original file line numberDiff line numberDiff line change
@@ -1456,18 +1456,6 @@ def test_groupby_rolling_nan_included():
14561456
tm.assert_frame_equal(result, expected)
14571457

14581458

1459-
def test_groupby_rolling_non_monotonic():
1460-
# GH 43909
1461-
1462-
shuffled = [3, 0, 1, 2]
1463-
sec = 1_000
1464-
df = DataFrame(
1465-
[{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
1466-
)
1467-
with pytest.raises(ValueError, match=r".* must be monotonic"):
1468-
df.groupby("c").rolling(on="t", window="3s")
1469-
1470-
14711459
@pytest.mark.parametrize("method", ["skew", "kurt"])
14721460
def test_rolling_skew_kurt_numerical_stability(method):
14731461
# GH#6929

pandas/tests/window/test_timeseries_window.py

-60
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
Series,
1010
Timestamp,
1111
date_range,
12-
to_datetime,
1312
)
1413
import pandas._testing as tm
1514

@@ -649,65 +648,6 @@ def agg_by_day(x):
649648

650649
tm.assert_frame_equal(result, expected)
651650

652-
def test_groupby_monotonic(self):
653-
654-
# GH 15130
655-
# we don't need to validate monotonicity when grouping
656-
657-
# GH 43909 we should raise an error here to match
658-
# behaviour of non-groupby rolling.
659-
660-
data = [
661-
["David", "1/1/2015", 100],
662-
["David", "1/5/2015", 500],
663-
["David", "5/30/2015", 50],
664-
["David", "7/25/2015", 50],
665-
["Ryan", "1/4/2014", 100],
666-
["Ryan", "1/19/2015", 500],
667-
["Ryan", "3/31/2016", 50],
668-
["Joe", "7/1/2015", 100],
669-
["Joe", "9/9/2015", 500],
670-
["Joe", "10/15/2015", 50],
671-
]
672-
673-
df = DataFrame(data=data, columns=["name", "date", "amount"])
674-
df["date"] = to_datetime(df["date"])
675-
df = df.sort_values("date")
676-
677-
expected = (
678-
df.set_index("date")
679-
.groupby("name")
680-
.apply(lambda x: x.rolling("180D")["amount"].sum())
681-
)
682-
result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
683-
tm.assert_series_equal(result, expected)
684-
685-
def test_non_monotonic_raises(self):
686-
# GH 13966 (similar to #15130, closed by #15175)
687-
688-
# superseded by 43909
689-
690-
dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
691-
df = DataFrame(
692-
{
693-
"A": [1] * 20 + [2] * 12 + [3] * 8,
694-
"B": np.concatenate((dates, dates)),
695-
"C": np.arange(40),
696-
}
697-
)
698-
699-
expected = (
700-
df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
701-
)
702-
with pytest.raises(ValueError, match=r".* must be monotonic"):
703-
df.groupby("A").rolling(
704-
"4s", on="B"
705-
).C.mean() # should raise for non-monotonic t series
706-
707-
df2 = df.sort_values("B")
708-
result = df2.groupby("A").rolling("4s", on="B").C.mean()
709-
tm.assert_series_equal(result, expected)
710-
711651
def test_rolling_cov_offset(self):
712652
# GH16058
713653

0 commit comments

Comments
 (0)