@@ -69,22 +69,28 @@ cdef bint is_monotonic_increasing_start_end_bounds(
69
69
# Rolling sum
70
70
71
71
72
- cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogil:
72
+ cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x,
73
+ int64_t num_consecutive_same_value, float64_t prev_value
74
+ ) nogil:
73
75
cdef:
74
76
float64_t result
75
77
76
78
if nobs == 0 == minp:
77
79
result = 0
78
80
elif nobs >= minp:
79
- result = sum_x
81
+ if num_consecutive_same_value >= nobs:
82
+ result = prev_value * nobs
83
+ else :
84
+ result = sum_x
80
85
else :
81
86
result = NaN
82
87
83
88
return result
84
89
85
90
86
91
cdef inline void add_sum(float64_t val, int64_t * nobs, float64_t * sum_x,
87
- float64_t * compensation) nogil:
92
+ float64_t * compensation, int64_t * num_consecutive_same_value,
93
+ float64_t * prev_value) nogil:
88
94
""" add a value from the sum calc using Kahan summation """
89
95
90
96
cdef:
@@ -98,6 +104,14 @@ cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
98
104
compensation[0 ] = t - sum_x[0 ] - y
99
105
sum_x[0 ] = t
100
106
107
+ # GH#42064, record num of same values to remove floating point artifacts
108
+ if val == prev_value[0 ]:
109
+ num_consecutive_same_value[0 ] += 1
110
+ else :
111
+ # reset to 1 (include current value itself)
112
+ num_consecutive_same_value[0 ] = 1
113
+ prev_value[0 ] = val
114
+
101
115
102
116
cdef inline void remove_sum(float64_t val, int64_t * nobs, float64_t * sum_x,
103
117
float64_t * compensation) nogil:
@@ -119,8 +133,8 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
119
133
ndarray[int64_t] end , int64_t minp ) -> np.ndarray:
120
134
cdef:
121
135
Py_ssize_t i , j
122
- float64_t sum_x , compensation_add , compensation_remove
123
- int64_t s , e
136
+ float64_t sum_x , compensation_add , compensation_remove , prev_value
137
+ int64_t s , e , num_consecutive_same_value
124
138
int64_t nobs = 0 , N = len (start)
125
139
ndarray[float64_t] output
126
140
bint is_monotonic_increasing_bounds
@@ -139,11 +153,13 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
139
153
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
140
154
141
155
# setup
142
-
156
+ prev_value = values[s]
157
+ num_consecutive_same_value = 0
143
158
sum_x = compensation_add = compensation_remove = 0
144
159
nobs = 0
145
160
for j in range (s, e):
146
- add_sum(values[j], & nobs, & sum_x, & compensation_add)
161
+ add_sum(values[j], & nobs, & sum_x, & compensation_add,
162
+ & num_consecutive_same_value, & prev_value)
147
163
148
164
else :
149
165
@@ -153,9 +169,10 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
153
169
154
170
# calculate adds
155
171
for j in range (end[i - 1 ], e):
156
- add_sum(values[j], & nobs, & sum_x, & compensation_add)
172
+ add_sum(values[j], & nobs, & sum_x, & compensation_add,
173
+ & num_consecutive_same_value, & prev_value)
157
174
158
- output[i] = calc_sum(minp, nobs, sum_x)
175
+ output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, prev_value )
159
176
160
177
if not is_monotonic_increasing_bounds:
161
178
nobs = 0
@@ -169,14 +186,17 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
169
186
# Rolling mean
170
187
171
188
172
- cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
173
- Py_ssize_t neg_ct, float64_t sum_x) nogil:
189
+ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct,
190
+ float64_t sum_x, int64_t num_consecutive_same_value,
191
+ float64_t prev_value) nogil:
174
192
cdef:
175
193
float64_t result
176
194
177
195
if nobs >= minp and nobs > 0 :
178
196
result = sum_x / < float64_t> nobs
179
- if neg_ct == 0 and result < 0 :
197
+ if num_consecutive_same_value >= nobs:
198
+ result = prev_value
199
+ elif neg_ct == 0 and result < 0 :
180
200
# all positive
181
201
result = 0
182
202
elif neg_ct == nobs and result > 0 :
@@ -190,7 +210,8 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
190
210
191
211
192
212
cdef inline void add_mean(float64_t val, Py_ssize_t * nobs, float64_t * sum_x,
193
- Py_ssize_t * neg_ct, float64_t * compensation) nogil:
213
+ Py_ssize_t * neg_ct, float64_t * compensation,
214
+ int64_t * num_consecutive_same_value, float64_t * prev_value) nogil:
194
215
""" add a value from the mean calc using Kahan summation """
195
216
cdef:
196
217
float64_t y, t
@@ -205,6 +226,14 @@ cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
205
226
if signbit(val):
206
227
neg_ct[0 ] = neg_ct[0 ] + 1
207
228
229
+ # GH#42064, record num of same values to remove floating point artifacts
230
+ if val == prev_value[0 ]:
231
+ num_consecutive_same_value[0 ] += 1
232
+ else :
233
+ # reset to 1 (include current value itself)
234
+ num_consecutive_same_value[0 ] = 1
235
+ prev_value[0 ] = val
236
+
208
237
209
238
cdef inline void remove_mean(float64_t val, Py_ssize_t * nobs, float64_t * sum_x,
210
239
Py_ssize_t * neg_ct, float64_t * compensation) nogil:
@@ -225,8 +254,8 @@ cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
225
254
def roll_mean (const float64_t[:] values , ndarray[int64_t] start ,
226
255
ndarray[int64_t] end , int64_t minp ) -> np.ndarray:
227
256
cdef:
228
- float64_t val , compensation_add , compensation_remove , sum_x
229
- int64_t s , e
257
+ float64_t val , compensation_add , compensation_remove , sum_x , prev_value
258
+ int64_t s , e , num_consecutive_same_value
230
259
Py_ssize_t nobs , i , j , neg_ct , N = len (start)
231
260
ndarray[float64_t] output
232
261
bint is_monotonic_increasing_bounds
@@ -244,12 +273,15 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
244
273
245
274
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
246
275
276
+ # setup
247
277
compensation_add = compensation_remove = sum_x = 0
248
278
nobs = neg_ct = 0
249
- # setup
279
+ prev_value = values[s]
280
+ num_consecutive_same_value = 0
250
281
for j in range (s, e):
251
282
val = values[j]
252
- add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add)
283
+ add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add,
284
+ & num_consecutive_same_value, & prev_value)
253
285
254
286
else :
255
287
@@ -261,9 +293,10 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
261
293
# calculate adds
262
294
for j in range (end[i - 1 ], e):
263
295
val = values[j]
264
- add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add)
296
+ add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add,
297
+ & num_consecutive_same_value, & prev_value)
265
298
266
- output[i] = calc_mean(minp, nobs, neg_ct, sum_x)
299
+ output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value )
267
300
268
301
if not is_monotonic_increasing_bounds:
269
302
nobs = 0
0 commit comments