Skip to content

BUG: Fix memory issues in rolling.min/max #33693

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 12 additions & 13 deletions asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,19 +150,18 @@ def time_quantile(self, constructor, window, dtype, percentile, interpolation):
self.roll.quantile(percentile, interpolation=interpolation)


class PeakMemFixed:
def setup(self):
N = 10
arr = 100 * np.random.random(N)
self.roll = pd.Series(arr).rolling(10)

def peakmem_fixed(self):
# GH 25926
# This is to detect memory leaks in rolling operations.
# To save time this is only ran on one method.
# 6000 iterations is enough for most types of leaks to be detected
for x in range(6000):
self.roll.max()
class PeakMemFixedWindowMinMax:

params = ["min", "max"]

def setup(self, operation):
N = int(1e6)
arr = np.random.random(N)
self.roll = pd.Series(arr).rolling(2)

def peakmem_fixed(self, operation):
for x in range(5):
getattr(self.roll, operation)()


class ForwardWindowMethods:
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrame.resample` where an ``AmbiguousTimeError`` would be raised when the resulting timezone aware :class:`DatetimeIndex` had a DST transition at midnight (:issue:`25758`)
- Bug in :meth:`DataFrame.groupby` where a ``ValueError`` would be raised when grouping by a categorical column with read-only categories and ``sort=False`` (:issue:`33410`)
- Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`)
- Bug in :meth:`Rolling.min` and :meth:`Rolling.max`: Growing memory usage after multiple calls when using a fixed window (:issue:`30726`)

Reshaping
^^^^^^^^^
Expand Down
16 changes: 7 additions & 9 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -971,8 +971,8 @@ cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs,
return result


def roll_max_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp, int64_t win):
def roll_max_fixed(float64_t[:] values, int64_t[:] start,
int64_t[:] end, int64_t minp, int64_t win):
"""
Moving max of 1d array of any numeric type along axis=0 ignoring NaNs.

Expand All @@ -988,7 +988,7 @@ def roll_max_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
make the interval closed on the right, left,
both or neither endpoints
"""
return _roll_min_max_fixed(values, start, end, minp, win, is_max=1)
return _roll_min_max_fixed(values, minp, win, is_max=1)


def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start,
Expand All @@ -1011,8 +1011,8 @@ def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start,
return _roll_min_max_variable(values, start, end, minp, is_max=1)


def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp, int64_t win):
def roll_min_fixed(float64_t[:] values, int64_t[:] start,
int64_t[:] end, int64_t minp, int64_t win):
"""
Moving min of 1d array of any numeric type along axis=0 ignoring NaNs.

Expand All @@ -1025,7 +1025,7 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
index : ndarray, optional
index for window computation
"""
return _roll_min_max_fixed(values, start, end, minp, win, is_max=0)
return _roll_min_max_fixed(values, minp, win, is_max=0)


def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start,
Expand Down Expand Up @@ -1112,9 +1112,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values,
return output


cdef _roll_min_max_fixed(ndarray[numeric] values,
ndarray[int64_t] starti,
ndarray[int64_t] endi,
cdef _roll_min_max_fixed(numeric[:] values,
int64_t minp,
int64_t win,
bint is_max):
Expand Down