From bca44331a1a6cb89c33e09db44f52996eb4da9bb Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 30 Sep 2020 23:09:14 +0200 Subject: [PATCH 1/4] Fix bugs in rolling sefault --- pandas/core/window/rolling.py | 4 ++-- pandas/tests/window/test_grouper.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 6ab42dda865e7..f550c2c9e08b8 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -397,7 +397,7 @@ def _insert_on_column(self, result: "DataFrame", obj: "DataFrame"): if self.on is not None and not self._on.equals(obj.index): name = self._on.name - extra_col = Series(self._on, index=obj.index, name=name) + extra_col = Series(self._on, index=self.obj.index, name=name).sort_index() if name in result.columns: # TODO: sure we want to overwrite results? result[name] = extra_col @@ -2268,7 +2268,7 @@ def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer: """ rolling_indexer: Type[BaseIndexer] indexer_kwargs: Optional[Dict] = None - index_array = self.obj.index.asi8 + index_array = self._on.asi8 if isinstance(self.window, BaseIndexer): rolling_indexer = type(self.window) indexer_kwargs = self.window.__dict__ diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 0eebd657e97b7..b1ed377326f60 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -416,3 +416,32 @@ def test_groupby_rolling_empty_frame(self): result = expected.groupby(["s1", "s2"]).rolling(window=1).sum() expected.index = pd.MultiIndex.from_tuples([], names=["s1", "s2", None]) tm.assert_frame_equal(result, expected) + + def test_groupby_rolling_string_index(self): + # GH: 36727 + df = pd.DataFrame( + [ + ["A", "group_1", pd.Timestamp(2019, 1, 1, 9)], + ["B", "group_1", pd.Timestamp(2019, 1, 2, 9)], + ["C", "group_2", pd.Timestamp(2019, 1, 3, 9)], + ["D", "group_1", pd.Timestamp(2019, 1, 6, 9)], + ["E", "group_2", pd.Timestamp(2019, 1, 20, 9)], + ], + columns=["index", "group", "eventTime"], + ).set_index("index") + + groups = df.groupby("group") + df["count_to_date"] = groups.cumcount() + rolling_groups = groups.rolling("10d", on="eventTime") + result = rolling_groups.apply(lambda df: df.shape[0]) + expected = pd.DataFrame( + [ + ["A", "group_1", pd.Timestamp(2019, 1, 1, 9), 1.0], + ["B", "group_1", pd.Timestamp(2019, 1, 2, 9), 2.0], + ["D", "group_1", pd.Timestamp(2019, 1, 6, 9), 3.0], + ["C", "group_2", pd.Timestamp(2019, 1, 3, 9), 1.0], + ["E", "group_2", pd.Timestamp(2019, 1, 20, 9), 1.0], + ], + columns=["index", "group", "eventTime", "count_to_date"] + ).set_index(["group", "index"]) + tm.assert_frame_equal(result, expected) From da13ded85ab2d9cd773c6ddcb5cffdaa3672129e Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 30 Sep 2020 23:23:43 +0200 Subject: [PATCH 2/4] Change test to test for all issues --- pandas/tests/window/test_grouper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index b1ed377326f60..ae1db12a12a39 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -423,8 +423,8 @@ def test_groupby_rolling_string_index(self): [ ["A", "group_1", pd.Timestamp(2019, 1, 1, 9)], ["B", "group_1", pd.Timestamp(2019, 1, 2, 9)], - ["C", "group_2", pd.Timestamp(2019, 1, 3, 9)], - ["D", "group_1", pd.Timestamp(2019, 1, 6, 9)], + ["Z", "group_2", pd.Timestamp(2019, 1, 3, 9)], + ["H", "group_1", pd.Timestamp(2019, 1, 6, 9)], ["E", "group_2", pd.Timestamp(2019, 1, 20, 9)], ], columns=["index", "group", "eventTime"], @@ -438,8 +438,8 @@ def test_groupby_rolling_string_index(self): [ ["A", "group_1", pd.Timestamp(2019, 1, 1, 9), 1.0], ["B", "group_1", pd.Timestamp(2019, 1, 2, 9), 2.0], - ["D", "group_1", pd.Timestamp(2019, 1, 6, 9), 3.0], - ["C", "group_2", pd.Timestamp(2019, 1, 3, 9), 1.0], + ["H", "group_1", pd.Timestamp(2019, 1, 6, 9), 3.0], + ["Z", "group_2", pd.Timestamp(2019, 1, 3, 9), 1.0], ["E", "group_2", pd.Timestamp(2019, 1, 20, 9), 1.0], ], columns=["index", "group", "eventTime", "count_to_date"] From 440368cd862f7e14a4b2637b56b713f8f4a241ba Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 30 Sep 2020 23:28:53 +0200 Subject: [PATCH 3/4] Run black pandas --- pandas/tests/window/test_grouper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index ae1db12a12a39..b93f9d5076b61 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -442,6 +442,6 @@ def test_groupby_rolling_string_index(self): ["Z", "group_2", pd.Timestamp(2019, 1, 3, 9), 1.0], ["E", "group_2", pd.Timestamp(2019, 1, 20, 9), 1.0], ], - columns=["index", "group", "eventTime", "count_to_date"] + columns=["index", "group", "eventTime", "count_to_date"], ).set_index(["group", "index"]) tm.assert_frame_equal(result, expected) From cc30cbe02769380c30f603a4c064f77ed5201a86 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 1 Oct 2020 09:23:46 +0200 Subject: [PATCH 4/4] Delete sort_index --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f550c2c9e08b8..6cad9cdd6528f 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -397,7 +397,7 @@ def _insert_on_column(self, result: "DataFrame", obj: "DataFrame"): if self.on is not None and not self._on.equals(obj.index): name = self._on.name - extra_col = Series(self._on, index=self.obj.index, name=name).sort_index() + extra_col = Series(self._on, index=self.obj.index, name=name) if name in result.columns: # TODO: sure we want to overwrite results? result[name] = extra_col