diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 254a004a37c40..fa4dadde13185 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -426,7 +426,7 @@ Performance improvements - :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`) - Indexing into a :class:`SparseArray` with a ``slice`` with ``step=1`` no longer requires converting to a dense array (:issue:`43777`) - Performance improvement in :meth:`SparseArray.take` with ``allow_fill=False`` (:issue:`43654`) -- Performance improvement in :meth:`.Rolling.mean` and :meth:`.Expanding.mean` with ``engine="numba"`` (:issue:`43612`) +- Performance improvement in :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.sum`, :meth:`.Expanding.sum` with ``engine="numba"`` (:issue:`43612`, :issue:`44176`) - Improved performance of :meth:`pandas.read_csv` with ``memory_map=True`` when file encoding is UTF-8 (:issue:`43787`) - Performance improvement in :meth:`RangeIndex.sort_values` overriding :meth:`Index.sort_values` (:issue:`43666`) - Performance improvement in :meth:`RangeIndex.insert` (:issue:`43988`) diff --git a/pandas/core/_numba/kernels/__init__.py b/pandas/core/_numba/kernels/__init__.py index eb43de1e0d979..23b0ec5c3d8aa 100644 --- a/pandas/core/_numba/kernels/__init__.py +++ b/pandas/core/_numba/kernels/__init__.py @@ -1,3 +1,4 @@ from pandas.core._numba.kernels.mean_ import sliding_mean +from pandas.core._numba.kernels.sum_ import sliding_sum -__all__ = ["sliding_mean"] +__all__ = ["sliding_mean", "sliding_sum"] diff --git a/pandas/core/_numba/kernels/mean_.py b/pandas/core/_numba/kernels/mean_.py index 32ea505513ed0..8f67dd9b51c06 100644 --- a/pandas/core/_numba/kernels/mean_.py +++ b/pandas/core/_numba/kernels/mean_.py @@ -1,5 +1,5 @@ """ -Numba 1D aggregation kernels that can be shared by +Numba 1D mean kernels that can be shared by * Dataframe / Series * groupby * rolling / expanding @@ -11,20 +11,7 @@ import numba import numpy as np - -@numba.jit(nopython=True, nogil=True, parallel=False) -def is_monotonic_increasing(bounds: np.ndarray) -> bool: - """Check if int64 values are monotonically increasing.""" - n = len(bounds) - if n < 2: - return True - prev = bounds[0] - for i in range(1, n): - cur = bounds[i] - if cur < prev: - return False - prev = cur - return True +from pandas.core._numba.kernels.shared import is_monotonic_increasing @numba.jit(nopython=True, nogil=True, parallel=False) diff --git a/pandas/core/_numba/kernels/shared.py b/pandas/core/_numba/kernels/shared.py new file mode 100644 index 0000000000000..d84e409ca879d --- /dev/null +++ b/pandas/core/_numba/kernels/shared.py @@ -0,0 +1,17 @@ +import numba +import numpy as np + + +@numba.jit(numba.boolean(numba.int64[:]), nopython=True, nogil=True, parallel=False) +def is_monotonic_increasing(bounds: np.ndarray) -> bool: + """Check if int64 values are monotonically increasing.""" + n = len(bounds) + if n < 2: + return True + prev = bounds[0] + for i in range(1, n): + cur = bounds[i] + if cur < prev: + return False + prev = cur + return True diff --git a/pandas/core/_numba/kernels/sum_.py b/pandas/core/_numba/kernels/sum_.py new file mode 100644 index 0000000000000..c2e81b4990ba9 --- /dev/null +++ b/pandas/core/_numba/kernels/sum_.py @@ -0,0 +1,98 @@ +""" +Numba 1D sum kernels that can be shared by +* Dataframe / Series +* groupby +* rolling / expanding + +Mirrors pandas/_libs/window/aggregation.pyx +""" +from __future__ import annotations + +import numba +import numpy as np + +from pandas.core._numba.kernels.shared import is_monotonic_increasing + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def add_sum( + val: float, nobs: int, sum_x: float, compensation: float +) -> tuple[int, float, float]: + if not np.isnan(val): + nobs += 1 + y = val - compensation + t = sum_x + y + compensation = t - sum_x - y + sum_x = t + return nobs, sum_x, compensation + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def remove_sum( + val: float, nobs: int, sum_x: float, compensation: float +) -> tuple[int, float, float]: + if not np.isnan(val): + nobs -= 1 + y = -val - compensation + t = sum_x + y + compensation = t - sum_x - y + sum_x = t + return nobs, sum_x, compensation + + +@numba.jit(nopython=True, nogil=True, parallel=False) +def sliding_sum( + values: np.ndarray, + start: np.ndarray, + end: np.ndarray, + min_periods: int, +) -> np.ndarray: + N = len(start) + nobs = 0 + sum_x = 0.0 + compensation_add = 0.0 + compensation_remove = 0.0 + + is_monotonic_increasing_bounds = is_monotonic_increasing( + start + ) and is_monotonic_increasing(end) + + output = np.empty(N, dtype=np.float64) + + for i in range(N): + s = start[i] + e = end[i] + if i == 0 or not is_monotonic_increasing_bounds: + for j in range(s, e): + val = values[j] + nobs, sum_x, compensation_add = add_sum( + val, nobs, sum_x, compensation_add + ) + else: + for j in range(start[i - 1], s): + val = values[j] + nobs, sum_x, compensation_remove = remove_sum( + val, nobs, sum_x, compensation_remove + ) + + for j in range(end[i - 1], e): + val = values[j] + nobs, sum_x, compensation_add = add_sum( + val, nobs, sum_x, compensation_add + ) + + if nobs == 0 == nobs: + result = 0.0 + elif nobs >= min_periods: + result = sum_x + else: + result = np.nan + + output[i] = result + + if not is_monotonic_increasing_bounds: + nobs = 0 + sum_x = 0.0 + compensation_remove = 0.0 + + return output diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 274c78c30aec4..b04aab3755b91 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1345,15 +1345,16 @@ def sum( if maybe_use_numba(engine): if self.method == "table": func = generate_manual_numpy_nan_agg_with_axis(np.nansum) + return self.apply( + func, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) else: - func = np.nansum + from pandas.core._numba.kernels import sliding_sum - return self.apply( - func, - raw=True, - engine=engine, - engine_kwargs=engine_kwargs, - ) + return self._numba_apply(sliding_sum, "rolling_sum", engine_kwargs) window_func = window_aggregations.roll_sum return self._apply(window_func, name="sum", **kwargs) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index d47b3e856cb25..9fd4bd422178a 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -59,7 +59,7 @@ def test_numba_vs_cython_rolling_methods( expected = getattr(roll, method)(engine="cython") # Check the cache - if method != "mean": + if method not in ("mean", "sum"): assert ( getattr(np, f"nan{method}"), "Rolling_apply_single", @@ -67,7 +67,9 @@ def test_numba_vs_cython_rolling_methods( tm.assert_equal(result, expected) - @pytest.mark.parametrize("data", [DataFrame(np.eye(5)), Series(range(5))]) + @pytest.mark.parametrize( + "data", [DataFrame(np.eye(5)), Series(range(5), name="foo")] + ) def test_numba_vs_cython_expanding_methods( self, data, nogil, parallel, nopython, arithmetic_numba_supported_operators ): @@ -82,7 +84,7 @@ def test_numba_vs_cython_expanding_methods( expected = getattr(expand, method)(engine="cython") # Check the cache - if method != "mean": + if method not in ("mean", "sum"): assert ( getattr(np, f"nan{method}"), "Expanding_apply_single",