Skip to content

fix Rolling for multi-index and reversed index. #28297

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Oct 22, 2019
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,9 @@ Plotting
Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^

-
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`).
- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`).
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)

Expand Down
18 changes: 12 additions & 6 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def _create_blocks(self):
obj = self._selected_obj

# filter out the on from the object
if self.on is not None:
if self.on is not None and not isinstance(self.on, Index):
if obj.ndim == 2:
obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
blocks = obj._to_dict_of_blocks(copy=False).values()
Expand Down Expand Up @@ -1651,18 +1651,19 @@ def is_datetimelike(self):

@cache_readonly
def _on(self):

if self.on is None:
if self.axis == 0:
return self.obj.index
elif self.axis == 1:
return self.obj.columns
elif isinstance(self.on, Index):
return self.on
elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns:
return Index(self.obj[self.on])
else:
raise ValueError(
"invalid on specified as {0}, "
"must be a column (if DataFrame) "
"must be a column (of DataFrame), an Index "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update the doc-string (and type annoation if its there) for on in Rolling

"or None".format(self.on)
)

Expand Down Expand Up @@ -1706,10 +1707,15 @@ def validate(self):

def _validate_monotonic(self):
"""
Validate on is_monotonic.
Validate monotonic (increasing or decreasing).
"""
if not self._on.is_monotonic:
formatted = self.on or "index"
if (
not self._on.is_monotonic_increasing
and not self._on.is_monotonic_decreasing
):
formatted = self.on
if self.on is None:
formatted = "index"
raise ValueError("{0} must be monotonic".format(formatted))

def _validate_freq(self):
Expand Down
29 changes: 29 additions & 0 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,3 +361,32 @@ def test_rolling_datetime(self, axis_frame, tz_naive_fixture):
}
)
tm.assert_frame_equal(result, expected)

def test_rolling_decreasing(self):
index = [
pd.Timestamp("20190101 09:00:00"),
pd.Timestamp("20190101 09:00:02"),
pd.Timestamp("20190101 09:00:03"),
pd.Timestamp("20190101 09:00:05"),
pd.Timestamp("20190101 09:00:06"),
]

df = pd.DataFrame({"column": [3, 4, 4, 2, 1]}, index=reversed(index))
result = df.rolling("2s").min()
tm.assert_frame_equal(
result,
pd.DataFrame({"column": [3.0, 3.0, 3.0, 2.0, 1.0]}, index=reversed(index)),
)

def test_rolling_multi_index(self):
df = pd.DataFrame(
{"column": range(6)},
index=pd.MultiIndex.from_product(
[pd.date_range("20190101", periods=3), range(2)], names=["date", "seq"]
),
)
result = df.rolling("10d", on=df.index.get_level_values("date")).sum()
tm.assert_frame_equal(
result,
pd.DataFrame({"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]}, index=df.index),
)
4 changes: 3 additions & 1 deletion pandas/tests/window/test_timeseries_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ def test_monotonic_on(self):
df.rolling("2s").sum()

# non-monotonic
df.index = reversed(df.index.tolist())
non_monotonic_index = df.index.to_list()
non_monotonic_index[0] = non_monotonic_index[3]
df.index = non_monotonic_index
assert not df.index.is_monotonic

with pytest.raises(ValueError):
Expand Down