@@ -70,22 +70,28 @@ cdef bint is_monotonic_increasing_start_end_bounds(
70
70
# Rolling sum
71
71
72
72
73
- cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogil:
73
+ cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x,
74
+ int64_t num_consecutive_same_value, float64_t prev_value
75
+ ) nogil:
74
76
cdef:
75
77
float64_t result
76
78
77
79
if nobs == 0 == minp:
78
80
result = 0
79
81
elif nobs >= minp:
80
- result = sum_x
82
+ if num_consecutive_same_value >= nobs:
83
+ result = prev_value * nobs
84
+ else :
85
+ result = sum_x
81
86
else :
82
87
result = NaN
83
88
84
89
return result
85
90
86
91
87
92
cdef inline void add_sum(float64_t val, int64_t * nobs, float64_t * sum_x,
88
- float64_t * compensation) nogil:
93
+ float64_t * compensation, int64_t * num_consecutive_same_value,
94
+ float64_t * prev_value) nogil:
89
95
""" add a value from the sum calc using Kahan summation """
90
96
91
97
cdef:
@@ -99,6 +105,14 @@ cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
99
105
compensation[0 ] = t - sum_x[0 ] - y
100
106
sum_x[0 ] = t
101
107
108
+ # GH#42064, record num of same values to remove floating point artifacts
109
+ if val == prev_value[0 ]:
110
+ num_consecutive_same_value[0 ] += 1
111
+ else :
112
+ # reset to 1 (include current value itself)
113
+ num_consecutive_same_value[0 ] = 1
114
+ prev_value[0 ] = val
115
+
102
116
103
117
cdef inline void remove_sum(float64_t val, int64_t * nobs, float64_t * sum_x,
104
118
float64_t * compensation) nogil:
@@ -120,8 +134,8 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
120
134
ndarray[int64_t] end , int64_t minp ) -> np.ndarray:
121
135
cdef:
122
136
Py_ssize_t i , j
123
- float64_t sum_x , compensation_add , compensation_remove
124
- int64_t s , e
137
+ float64_t sum_x , compensation_add , compensation_remove , prev_value
138
+ int64_t s , e , num_consecutive_same_value
125
139
int64_t nobs = 0 , N = len (start)
126
140
ndarray[float64_t] output
127
141
bint is_monotonic_increasing_bounds
@@ -140,11 +154,13 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
140
154
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
141
155
142
156
# setup
143
-
157
+ prev_value = values[s]
158
+ num_consecutive_same_value = 0
144
159
sum_x = compensation_add = compensation_remove = 0
145
160
nobs = 0
146
161
for j in range (s, e):
147
- add_sum(values[j], & nobs, & sum_x, & compensation_add)
162
+ add_sum(values[j], & nobs, & sum_x, & compensation_add,
163
+ & num_consecutive_same_value, & prev_value)
148
164
149
165
else :
150
166
@@ -154,9 +170,10 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
154
170
155
171
# calculate adds
156
172
for j in range (end[i - 1 ], e):
157
- add_sum(values[j], & nobs, & sum_x, & compensation_add)
173
+ add_sum(values[j], & nobs, & sum_x, & compensation_add,
174
+ & num_consecutive_same_value, & prev_value)
158
175
159
- output[i] = calc_sum(minp, nobs, sum_x)
176
+ output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, prev_value )
160
177
161
178
if not is_monotonic_increasing_bounds:
162
179
nobs = 0
@@ -170,14 +187,17 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
170
187
# Rolling mean
171
188
172
189
173
- cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
174
- Py_ssize_t neg_ct, float64_t sum_x) nogil:
190
+ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct,
191
+ float64_t sum_x, int64_t num_consecutive_same_value,
192
+ float64_t prev_value) nogil:
175
193
cdef:
176
194
float64_t result
177
195
178
196
if nobs >= minp and nobs > 0 :
179
197
result = sum_x / < float64_t> nobs
180
- if neg_ct == 0 and result < 0 :
198
+ if num_consecutive_same_value >= nobs:
199
+ result = prev_value
200
+ elif neg_ct == 0 and result < 0 :
181
201
# all positive
182
202
result = 0
183
203
elif neg_ct == nobs and result > 0 :
@@ -191,7 +211,8 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
191
211
192
212
193
213
cdef inline void add_mean(float64_t val, Py_ssize_t * nobs, float64_t * sum_x,
194
- Py_ssize_t * neg_ct, float64_t * compensation) nogil:
214
+ Py_ssize_t * neg_ct, float64_t * compensation,
215
+ int64_t * num_consecutive_same_value, float64_t * prev_value) nogil:
195
216
""" add a value from the mean calc using Kahan summation """
196
217
cdef:
197
218
float64_t y, t
@@ -206,6 +227,14 @@ cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
206
227
if signbit(val):
207
228
neg_ct[0 ] = neg_ct[0 ] + 1
208
229
230
+ # GH#42064, record num of same values to remove floating point artifacts
231
+ if val == prev_value[0 ]:
232
+ num_consecutive_same_value[0 ] += 1
233
+ else :
234
+ # reset to 1 (include current value itself)
235
+ num_consecutive_same_value[0 ] = 1
236
+ prev_value[0 ] = val
237
+
209
238
210
239
cdef inline void remove_mean(float64_t val, Py_ssize_t * nobs, float64_t * sum_x,
211
240
Py_ssize_t * neg_ct, float64_t * compensation) nogil:
@@ -226,8 +255,8 @@ cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
226
255
def roll_mean (const float64_t[:] values , ndarray[int64_t] start ,
227
256
ndarray[int64_t] end , int64_t minp ) -> np.ndarray:
228
257
cdef:
229
- float64_t val , compensation_add , compensation_remove , sum_x
230
- int64_t s , e
258
+ float64_t val , compensation_add , compensation_remove , sum_x , prev_value
259
+ int64_t s , e , num_consecutive_same_value
231
260
Py_ssize_t nobs , i , j , neg_ct , N = len (start)
232
261
ndarray[float64_t] output
233
262
bint is_monotonic_increasing_bounds
@@ -245,12 +274,15 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
245
274
246
275
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
247
276
277
+ # setup
248
278
compensation_add = compensation_remove = sum_x = 0
249
279
nobs = neg_ct = 0
250
- # setup
280
+ prev_value = values[s]
281
+ num_consecutive_same_value = 0
251
282
for j in range (s, e):
252
283
val = values[j]
253
- add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add)
284
+ add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add,
285
+ & num_consecutive_same_value, & prev_value)
254
286
255
287
else :
256
288
@@ -262,9 +294,10 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
262
294
# calculate adds
263
295
for j in range (end[i - 1 ], e):
264
296
val = values[j]
265
- add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add)
297
+ add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add,
298
+ & num_consecutive_same_value, & prev_value)
266
299
267
- output[i] = calc_mean(minp, nobs, neg_ct, sum_x)
300
+ output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value )
268
301
269
302
if not is_monotonic_increasing_bounds:
270
303
nobs = 0
0 commit comments