@@ -1153,6 +1153,31 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
1153
1153
1154
1154
return result
1155
1155
1156
+ # ----------------------------------------------------------------------
1157
+ # Auxiliary function for roll_var, roll_skew and roll_kurt
1158
+
1159
+ cdef inline void count_repeats(double val, double prev, double * rep,
1160
+ Py_ssize_t * nrep, Py_ssize_t * nobs) nogil:
1161
+ if prev == prev:
1162
+ # prev is not NaN, removing an observation...
1163
+ if nobs[0 ] == nrep[0 ]:
1164
+ # ...and removing a repeat
1165
+ nrep[0 ] -= 1
1166
+ if nrep[0 ] == 0 :
1167
+ rep[0 ] = NaN
1168
+ nobs[0 ] -= 1
1169
+
1170
+ if val == val:
1171
+ # next is not NaN, adding an observation...
1172
+ if val == prev:
1173
+ # ...and adding a repeat
1174
+ nrep[0 ] += 1
1175
+ else :
1176
+ # ...and resetting repeats
1177
+ nrep[0 ] = 1
1178
+ rep[0 ] = val
1179
+ nobs[0 ] += 1
1180
+
1156
1181
# ----------------------------------------------------------------------
1157
1182
# Rolling variance
1158
1183
@@ -1241,88 +1266,160 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
1241
1266
1242
1267
return output
1243
1268
1244
-
1245
1269
# -------------------------------------------------------------------------------
1246
1270
# Rolling skewness
1247
1271
1248
1272
def roll_skew (ndarray[double_t] input , int win , int minp ):
1273
+ """
1274
+ Numerically stable implementation using a Welford-like method.
1275
+ """
1249
1276
cdef double val, prev
1250
- cdef double x = 0 , xx = 0 , xxx = 0
1251
- cdef Py_ssize_t nobs = 0 , i
1277
+ cdef double mean_x = 0 , sum2dm_x = 0 , sum3dm_x = 0
1278
+ cdef double delta, rep = NaN, val_prev, delta_nobs, sum_sum2dm_x
1279
+ cdef Py_ssize_t nobs = 0 , nrep = 0 , i
1252
1280
cdef Py_ssize_t N = len (input )
1253
1281
1254
1282
cdef ndarray[double_t] output = np.empty(N, dtype = float )
1255
1283
1256
- # 3 components of the skewness equation
1257
- cdef double A, B, C, R
1258
-
1259
1284
minp = _check_minp(win, minp, N)
1260
1285
1261
- for i from 0 <= i < minp - 1 :
1262
- val = input [i]
1263
-
1264
- # Not NaN
1265
- if val == val:
1266
- nobs += 1
1267
- x += val
1268
- xx += val * val
1269
- xxx += val * val * val
1270
-
1271
- output[i] = NaN
1272
-
1273
- for i from minp - 1 <= i < N:
1286
+ for i from 0 <= i < N:
1274
1287
val = input [i]
1275
-
1276
- if val == val:
1277
- nobs += 1
1278
- x += val
1279
- xx += val * val
1280
- xxx += val * val * val
1281
-
1282
- if i > win - 1 :
1283
- prev = input [i - win]
1288
+ prev = NaN if i < win else input [i - win]
1289
+
1290
+ # First, count the number of onservation and consecutive repeats
1291
+ count_repeats(val, prev, & rep, & nrep, & nobs)
1292
+
1293
+ # Then, compute the new mean and sums of squared and cubed differences
1294
+ if nobs == nrep:
1295
+ # All non-NaN values in window are identical...
1296
+ sum2dm_x = sum3dm_x = 0
1297
+ mean_x = rep if nobs > 0 else 0
1298
+ elif val == val:
1299
+ # Adding one observation...
1284
1300
if prev == prev:
1285
- x -= prev
1286
- xx -= prev * prev
1287
- xxx -= prev * prev * prev
1288
-
1289
- nobs -= 1
1290
-
1301
+ # ...and removing another
1302
+ delta_nobs = val - prev
1303
+ delta = delta_nobs / nobs
1304
+ prev -= mean_x
1305
+ mean_x += delta
1306
+ val -= mean_x
1307
+ val_prev = val + prev
1308
+ sum_sum2dm_x = sum2dm_x
1309
+ sum2dm_x += delta_nobs * val_prev
1310
+ sum_sum2dm_x += sum2dm_x
1311
+ sum3dm_x += (0.25 * delta_nobs * delta * (nobs* nobs - 1 ) +
1312
+ 0.75 * nobs * val_prev * val_prev -
1313
+ 1.5 * sum_sum2dm_x) * delta
1314
+ else :
1315
+ # ...and not removing any
1316
+ delta_nobs = val - mean_x
1317
+ delta = delta_nobs / nobs
1318
+ mean_x += delta
1319
+ sum3dm_x += delta * (delta_nobs* delta* (nobs- 1 )* (nobs- 2 ) -
1320
+ 3 * sum2dm_x)
1321
+ sum2dm_x += delta_nobs * (val - mean_x)
1322
+ elif prev == prev:
1323
+ # Adding no new observation, but removing one
1324
+ delta_nobs = prev - mean_x
1325
+ delta = delta_nobs / nobs
1326
+ sum3dm_x -= delta * (delta * delta_nobs * (nobs+ 1 ) * (nobs+ 2 ) -
1327
+ 3 * sum2dm_x)
1328
+ sum2dm_x -= delta_nobs * delta * (nobs + 1 )
1329
+ mean_x -= delta
1330
+
1331
+ # Finally, compute and write the rolling skewness to the output array
1291
1332
if nobs >= minp:
1292
- A = x / nobs
1293
- B = xx / nobs - A * A
1294
- C = xxx / nobs - A * A * A - 3 * A * B
1295
-
1296
- R = sqrt(B)
1297
-
1298
- if B == 0 or nobs < 3 :
1299
- output[i] = NaN
1333
+ if nobs < 3 or sum2dm_x == 0 :
1334
+ val = NaN
1300
1335
else :
1301
- output[i] = ((sqrt(nobs * (nobs - 1. )) * C) /
1302
- (( nobs- 2 ) * R * R * R) )
1336
+ val = sum3dm_x / sum2dm_x / sqrt(sum2dm_x)
1337
+ val *= ( nobs) * sqrt(nobs - 1 ) / (nobs - 2 )
1303
1338
else :
1304
- output[i] = NaN
1339
+ val = NaN
1340
+
1341
+ output[i] = val
1305
1342
1306
1343
return output
1307
1344
1308
1345
# -------------------------------------------------------------------------------
1309
1346
# Rolling kurtosis
1310
1347
1311
1348
1312
- def roll_kurt (ndarray[double_t] input ,
1313
- int win , int minp ):
1349
+ def roll_kurt (ndarray[double_t] input , int win , int minp ):
1350
+ """
1351
+ Numerically stable implementation using a Welford-like method.
1352
+ """
1314
1353
cdef double val, prev
1315
- cdef double x = 0 , xx = 0 , xxx = 0 , xxxx = 0
1316
- cdef Py_ssize_t nobs = 0 , i
1354
+ cdef double mean_x = 0 , sum2dm_x = 0 , sum3dm_x = 0 , sum4dm_x = 0
1355
+ cdef double rep = NaN, delta, val_prev, delta_nobs, sum_sum2dm_x
1356
+ cdef Py_ssize_t nobs = 0 , nrep = 0 , i
1317
1357
cdef Py_ssize_t N = len (input )
1318
1358
1319
1359
cdef ndarray[double_t] output = np.empty(N, dtype = float )
1320
1360
1321
- # 5 components of the kurtosis equation
1322
- cdef double A, B, C, D, R, K
1323
-
1324
1361
minp = _check_minp(win, minp, N)
1325
1362
1363
+ for i from 0 <= i < N:
1364
+ val = input [i]
1365
+ prev = NaN if i < win else input [i - win]
1366
+
1367
+ # First, count the number of onservation and consecutive repeats
1368
+ count_repeats(val, prev, & rep, & nrep, & nobs)
1369
+
1370
+ # Then, compute the new mean and sums of squared, cubed and
1371
+ # tesseracted differences
1372
+ if nobs == nrep:
1373
+ # All non-NaN values in window are identical...
1374
+ sum2dm_x = sum3dm_x = sum4dm_x = 0
1375
+ mean_x = rep if nobs > 0 else 0
1376
+ elif val == val:
1377
+ # Adding one observation...
1378
+ if prev == prev:
1379
+ # ...and removing another
1380
+ delta_nobs = val - prev
1381
+ delta = delta_nobs / nobs
1382
+ prev -= mean_x
1383
+ mean_x += delta
1384
+ val -= mean_x
1385
+ val_prev = val + prev
1386
+ sum_sum2dm_x = sum2dm_x
1387
+ dif_sum2dm_x = - sum2dm_x
1388
+ sum2dm_x += delta_nobs * val_prev
1389
+ sum_sum2dm_x += sum2dm_x
1390
+ dif_sum2dm_x += sum2dm_x
1391
+ sum_sum3dm_x = sum3dm_x
1392
+ sum3dm_x += (0.25 * delta_nobs * delta * (nobs* nobs - 1 ) +
1393
+ 0.75 * nobs * val_prev * val_prev -
1394
+ 1.5 * sum_sum2dm_x) * delta
1395
+ sum_sum3dm_x += sum3dm_x
1396
+ sum4dm_x = (2 * m * (val* val* val + prev* prev* prev) -
1397
+ delta * delta_nobs * (m- 2 ) * (m- 1 ) * val_prev -
1398
+ 2 * sum_sum3dm_x - delta * dif_sum2dm_x) * delta
1399
+ else :
1400
+ # ...and not removing any
1401
+ delta_nobs = val - mean_x
1402
+ delta = delta_nobs / nobs
1403
+ delta2 = delta * delta
1404
+
1405
+ sum4dm_x += (delta * delta * delta_nobs * (nobs - 1 ) * (
1406
+
1407
+ mean_x += delta
1408
+ sum3dm_x += delta * (delta_nobs* delta* (nobs- 1 )* (nobs- 2 ) -
1409
+ 3 * sum2dm_x)
1410
+ sum2dm_x += delta_nobs * (val - mean_x)
1411
+ elif prev == prev:
1412
+ # Adding no new observation, but removing one
1413
+ delta_nobs = prev - mean_x
1414
+ delta = delta_nobs / nobs
1415
+ sum3dm_x -= delta * (delta * delta_nobs * (nobs+ 1 ) * (nobs+ 2 ) -
1416
+ 3 * sum2dm_x)
1417
+ sum2dm_x -= delta_nobs * delta * (nobs + 1 )
1418
+ mean_x -= delta
1419
+
1420
+
1421
+
1422
+
1326
1423
for i from 0 <= i < minp - 1 :
1327
1424
val = input [i]
1328
1425
0 commit comments