pandas-dev
diff --git a/‎.github/workflows/stale-pr.yml
+21 b/‎.github/workflows/stale-pr.yml
+21
diff --git a/‎doc/source/whatsnew/v1.2.0.rst
+1 b/‎doc/source/whatsnew/v1.2.0.rst
+1
diff --git a/‎pandas/_libs/window/aggregations.pyx
+55-30 b/‎pandas/_libs/window/aggregations.pyx
+55-30
diff --git a/‎pandas/core/arrays/_mixins.py
+12 b/‎pandas/core/arrays/_mixins.py
+12
diff --git a/‎pandas/core/arrays/categorical.py
+17-26 b/‎pandas/core/arrays/categorical.py
+17-26
diff --git a/‎pandas/core/arrays/datetimelike.py
+2-4 b/‎pandas/core/arrays/datetimelike.py
+2-4
@@ -0,0 +1,21 @@
+name: "Stale PRs"
+on:
+  schedule:
+  # * is a special character in YAML so you have to quote this string
+  - cron: "0 */6 * * *"
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/stale@v3
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        stale-pr-message: "This pull request is stale because it has been open for thirty days with no activity."
+        skip-stale-pr-message: false
+        stale-pr-label: "Stale"
+        exempt-pr-labels: "Needs Review,Blocked"
+        days-before-stale: 30
+        days-before-close: -1
+        remove-stale-when-updated: true
+        debug-only: true
@@ -118,6 +118,7 @@ Other enhancements
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
 - :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
 - `Styler` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
+- :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`)
 
 .. _whatsnew_120.api_breaking.python:
 
 
@@ -161,27 +161,42 @@ cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogi
     return result
 
 
-cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogil:
-    """ add a value from the sum calc """
+cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
+                         float64_t *compensation) nogil:
+    """ add a value from the sum calc using Kahan summation """
+
+    cdef:
+        float64_t y, t
 
     # Not NaN
     if notnan(val):
         nobs[0] = nobs[0] + 1
-        sum_x[0] = sum_x[0] + val
+        y = val - compensation[0]
+        t = sum_x[0] + y
+        compensation[0] = t - sum_x[0] - y
+        sum_x[0] = t
 
 
-cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogil:
-    """ remove a value from the sum calc """
+cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
+                            float64_t *compensation) nogil:
+    """ remove a value from the sum calc using Kahan summation """
+
+    cdef:
+        float64_t y, t
 
+    # Not NaN
     if notnan(val):
         nobs[0] = nobs[0] - 1
-        sum_x[0] = sum_x[0] - val
+        y = - val - compensation[0]
+        t = sum_x[0] + y
+        compensation[0] = t - sum_x[0] - y
+        sum_x[0] = t
 
 
 def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start,
                       ndarray[int64_t] end, int64_t minp):
     cdef:
-        float64_t sum_x = 0
+        float64_t sum_x = 0, compensation_add = 0, compensation_remove = 0
         int64_t s, e
         int64_t nobs = 0, i, j, N = len(values)
         ndarray[float64_t] output
@@ -201,31 +216,31 @@ def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start,
                 # setup
 
                 for j in range(s, e):
-                    add_sum(values[j], &nobs, &sum_x)
+                    add_sum(values[j], &nobs, &sum_x, &compensation_add)
 
             else:
 
                 # calculate deletes
                 for j in range(start[i - 1], s):
-                    remove_sum(values[j], &nobs, &sum_x)
+                    remove_sum(values[j], &nobs, &sum_x, &compensation_remove)
 
                 # calculate adds
                 for j in range(end[i - 1], e):
-                    add_sum(values[j], &nobs, &sum_x)
+                    add_sum(values[j], &nobs, &sum_x, &compensation_add)
 
             output[i] = calc_sum(minp, nobs, sum_x)
 
             if not is_monotonic_bounds:
                 for j in range(s, e):
-                    remove_sum(values[j], &nobs, &sum_x)
+                    remove_sum(values[j], &nobs, &sum_x, &compensation_remove)
 
     return output
 
 
 def roll_sum_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
                    ndarray[int64_t] end, int64_t minp, int64_t win):
     cdef:
-        float64_t val, prev_x, sum_x = 0
+        float64_t val, prev_x, sum_x = 0, compensation_add = 0, compensation_remove = 0
         int64_t range_endpoint
         int64_t nobs = 0, i, N = len(values)
         ndarray[float64_t] output
@@ -237,16 +252,16 @@ def roll_sum_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
     with nogil:
 
         for i in range(0, range_endpoint):
-            add_sum(values[i], &nobs, &sum_x)
+            add_sum(values[i], &nobs, &sum_x, &compensation_add)
             output[i] = NaN
 
         for i in range(range_endpoint, N):
             val = values[i]
-            add_sum(val, &nobs, &sum_x)
+            add_sum(val, &nobs, &sum_x, &compensation_add)
 
             if i > win - 1:
                 prev_x = values[i - win]
-                remove_sum(prev_x, &nobs, &sum_x)
+                remove_sum(prev_x, &nobs, &sum_x, &compensation_remove)
 
             output[i] = calc_sum(minp, nobs, sum_x)
 
@@ -277,32 +292,42 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
 
 
 cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
-                          Py_ssize_t *neg_ct) nogil:
-    """ add a value from the mean calc """
+                          Py_ssize_t *neg_ct, float64_t *compensation) nogil:
+    """ add a value from the mean calc using Kahan summation """
+    cdef:
+        float64_t y, t
 
     # Not NaN
     if notnan(val):
         nobs[0] = nobs[0] + 1
-        sum_x[0] = sum_x[0] + val
+        y = val - compensation[0]
+        t = sum_x[0] + y
+        compensation[0] = t - sum_x[0] - y
+        sum_x[0] = t
         if signbit(val):
             neg_ct[0] = neg_ct[0] + 1
 
 
 cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
-                             Py_ssize_t *neg_ct) nogil:
-    """ remove a value from the mean calc """
+                             Py_ssize_t *neg_ct, float64_t *compensation) nogil:
+    """ remove a value from the mean calc using Kahan summation """
+    cdef:
+        float64_t y, t
 
     if notnan(val):
         nobs[0] = nobs[0] - 1
-        sum_x[0] = sum_x[0] - val
+        y = - val - compensation[0]
+        t = sum_x[0] + y
+        compensation[0] = t - sum_x[0] - y
+        sum_x[0] = t
         if signbit(val):
             neg_ct[0] = neg_ct[0] - 1
 
 
 def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
                     ndarray[int64_t] end, int64_t minp, int64_t win):
     cdef:
-        float64_t val, prev_x, sum_x = 0
+        float64_t val, prev_x, sum_x = 0, compensation_add = 0, compensation_remove = 0
         Py_ssize_t nobs = 0, i, neg_ct = 0, N = len(values)
         ndarray[float64_t] output
 
@@ -311,16 +336,16 @@ def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
     with nogil:
         for i in range(minp - 1):
             val = values[i]
-            add_mean(val, &nobs, &sum_x, &neg_ct)
+            add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add)
             output[i] = NaN
 
         for i in range(minp - 1, N):
             val = values[i]
-            add_mean(val, &nobs, &sum_x, &neg_ct)
+            add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add)
 
             if i > win - 1:
                 prev_x = values[i - win]
-                remove_mean(prev_x, &nobs, &sum_x, &neg_ct)
+                remove_mean(prev_x, &nobs, &sum_x, &neg_ct, &compensation_remove)
 
             output[i] = calc_mean(minp, nobs, neg_ct, sum_x)
 
@@ -330,7 +355,7 @@ def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
 def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start,
                        ndarray[int64_t] end, int64_t minp):
     cdef:
-        float64_t val, sum_x = 0
+        float64_t val, compensation_add = 0, compensation_remove = 0, sum_x = 0
         int64_t s, e
         Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values)
         ndarray[float64_t] output
@@ -350,26 +375,26 @@ def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start,
                 # setup
                 for j in range(s, e):
                     val = values[j]
-                    add_mean(val, &nobs, &sum_x, &neg_ct)
+                    add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add)
 
             else:
 
                 # calculate deletes
                 for j in range(start[i - 1], s):
                     val = values[j]
-                    remove_mean(val, &nobs, &sum_x, &neg_ct)
+                    remove_mean(val, &nobs, &sum_x, &neg_ct, &compensation_remove)
 
                 # calculate adds
                 for j in range(end[i - 1], e):
                     val = values[j]
-                    add_mean(val, &nobs, &sum_x, &neg_ct)
+                    add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add)
 
             output[i] = calc_mean(minp, nobs, neg_ct, sum_x)
 
             if not is_monotonic_bounds:
                 for j in range(s, e):
                     val = values[j]
-                    remove_mean(val, &nobs, &sum_x, &neg_ct)
+                    remove_mean(val, &nobs, &sum_x, &neg_ct, &compensation_remove)
     return output
 
 # ----------------------------------------------------------------------
 
@@ -9,6 +9,7 @@
 from pandas.core.algorithms import take, unique
 from pandas.core.array_algos.transforms import shift
 from pandas.core.arrays.base import ExtensionArray
+from pandas.core.indexers import check_array_indexer
 
 _T = TypeVar("_T", bound="NDArrayBackedExtensionArray")
 
@@ -156,3 +157,14 @@ def _validate_shift_value(self, fill_value):
         # TODO: after deprecation in datetimelikearraymixin is enforced,
         #  we can remove this and ust validate_fill_value directly
         return self._validate_fill_value(fill_value)
+
+    def __setitem__(self, key, value):
+        key = self._validate_setitem_key(key)
+        value = self._validate_setitem_value(value)
+        self._ndarray[key] = value
+
+    def _validate_setitem_key(self, key):
+        return check_array_indexer(self, key)
+
+    def _validate_setitem_value(self, value):
+        return value
@@ -93,7 +93,7 @@ def func(self, other):
 
         if is_scalar(other):
             if other in self.categories:
-                i = self.categories.get_loc(other)
+                i = self._unbox_scalar(other)
                 ret = op(self._codes, i)
 
                 if opname not in {"__eq__", "__ge__", "__gt__"}:
@@ -1184,8 +1184,7 @@ def _validate_searchsorted_value(self, value):
         # searchsorted is very performance sensitive. By converting codes
         # to same dtype as self.codes, we get much faster performance.
         if is_scalar(value):
-            codes = self.categories.get_loc(value)
-            codes = self.codes.dtype.type(codes)
+            codes = self._unbox_scalar(value)
         else:
             locs = [self.categories.get_loc(x) for x in value]
             codes = np.array(locs, dtype=self.codes.dtype)
@@ -1212,7 +1211,7 @@ def _validate_fill_value(self, fill_value):
         if isna(fill_value):
             fill_value = -1
         elif fill_value in self.categories:
-            fill_value = self.categories.get_loc(fill_value)
+            fill_value = self._unbox_scalar(fill_value)
         else:
             raise ValueError(
                 f"'fill_value={fill_value}' is not present "
@@ -1680,7 +1679,7 @@ def fillna(self, value=None, method=None, limit=None):
                     if isna(value):
                         codes[mask] = -1
                     else:
-                        codes[mask] = self.categories.get_loc(value)
+                        codes[mask] = self._unbox_scalar(value)
 
             else:
                 raise TypeError(
@@ -1734,6 +1733,17 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
 
         return codes
 
+    def _unbox_scalar(self, key) -> int:
+        # searchsorted is very performance sensitive. By converting codes
+        # to same dtype as self.codes, we get much faster performance.
+        code = self.categories.get_loc(key)
+        code = self._codes.dtype.type(code)
+        return code
+
+    def _unbox_listlike(self, value):
+        unboxed = self.categories.get_indexer(value)
+        return unboxed.astype(self._ndarray.dtype, copy=False)
+
     # ------------------------------------------------------------------
 
     def take_nd(self, indexer, allow_fill: bool = False, fill_value=None):
@@ -1884,20 +1894,6 @@ def __getitem__(self, key):
             return result
         return self._from_backing_data(result)
 
-    def __setitem__(self, key, value):
-        """
-        Item assignment.
-
-        Raises
-        ------
-        ValueError
-            If (one or more) Value is not in categories or if a assigned
-            `Categorical` does not have the same categories
-        """
-        key = self._validate_setitem_key(key)
-        value = self._validate_setitem_value(value)
-        self._ndarray[key] = value
-
     def _validate_setitem_value(self, value):
         value = extract_array(value, extract_numpy=True)
 
@@ -1925,11 +1921,7 @@ def _validate_setitem_value(self, value):
                 "category, set the categories first"
             )
 
-        lindexer = self.categories.get_indexer(rvalue)
-        if isinstance(lindexer, np.ndarray) and lindexer.dtype.kind == "i":
-            lindexer = lindexer.astype(self._ndarray.dtype)
-
-        return lindexer
+        return self._unbox_listlike(rvalue)
 
     def _validate_setitem_key(self, key):
         if lib.is_integer(key):
@@ -2155,8 +2147,7 @@ def unique(self):
         return cat.set_categories(cat.categories.take(take_codes))
 
     def _values_for_factorize(self):
-        codes = self.codes.astype("int64")
-        return codes, -1
+        return self._ndarray, -1
 
     @classmethod
     def _from_factorized(cls, uniques, original):
 
@@ -609,9 +609,7 @@ def __setitem__(
         if no_op:
             return
 
-        value = self._validate_setitem_value(value)
-        key = check_array_indexer(self, key)
-        self._ndarray[key] = value
+        super().__setitem__(key, value)
         self._maybe_clear_freq()
 
     def _maybe_clear_freq(self):
@@ -697,7 +695,7 @@ def copy(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT:
         return new_obj
 
     def _values_for_factorize(self):
-        return self.asi8, iNaT
+        return self._ndarray, iNaT
 
     @classmethod
     def _from_factorized(cls, values, original):