@@ -142,7 +142,9 @@ def __init__(
142
142
if smoother_name == "savgol" :
143
143
# The polynomial fitting is done on a past window of size window_length
144
144
# including the current day value.
145
- self .coeffs = self .savgol_coeffs (- self .window_length + 1 , 0 )
145
+ self .coeffs = self .savgol_coeffs (
146
+ - self .window_length + 1 , 0 , self .poly_fit_degree
147
+ )
146
148
else :
147
149
self .coeffs = None
148
150
@@ -278,30 +280,35 @@ def left_gauss_linear_smoother(self, signal):
278
280
signal_smoothed [signal_smoothed <= self .minval ] = self .minval
279
281
return signal_smoothed
280
282
281
- def savgol_predict (self , signal ):
283
+ def savgol_predict (self , signal , poly_fit_degree , nr ):
282
284
"""Predict a single value using the savgol method.
283
285
284
286
Fits a polynomial through the values given by the signal and returns the value
285
- of the polynomial at the right-most signal-value. More precisely, fits a polynomial
286
- f(t) of degree poly_fit_degree through the points signal[-n], signal[-n+1] ..., signal[-1],
287
- and returns the evaluation of the polynomial at the location of signal[0].
287
+ of the polynomial at the right-most signal-value. More precisely, for a signal of length
288
+ n, fits a poly_fit_degree polynomial through the points signal[-n+1+nr], signal[-n+2+nr],
289
+ ..., signal[nr], and returns the evaluation of the polynomial at signal[0]. Hence, if
290
+ nr=0, then the last value of the signal is smoothed, and if nr=-1, then the value after
291
+ the last signal value is anticipated.
288
292
289
293
Parameters
290
294
----------
291
295
signal: np.ndarray
292
296
A 1D signal to smooth.
297
+ poly_fit_degree: int
298
+ The degree of the polynomial fit.
299
+ nr: int
300
+ An integer that determines the position of the predicted value relative to the signal.
293
301
294
302
Returns
295
303
----------
296
304
predicted_value: float
297
305
The anticipated value that comes after the end of the signal based on a polynomial fit.
298
306
"""
299
- # Add one
300
- coeffs = self .savgol_coeffs (- len (signal ) + 1 , 0 )
307
+ coeffs = self .savgol_coeffs (- len (signal ) + 1 + nr , nr , poly_fit_degree )
301
308
predicted_value = signal @ coeffs
302
309
return predicted_value
303
310
304
- def savgol_coeffs (self , nl , nr ):
311
+ def savgol_coeffs (self , nl , nr , poly_fit_degree ):
305
312
"""Solve for the Savitzky-Golay coefficients.
306
313
307
314
The coefficients c_i give a filter so that
@@ -321,23 +328,20 @@ def savgol_coeffs(self, nl, nr):
321
328
The right window bound for the polynomial fit, inclusive.
322
329
poly_fit_degree: int
323
330
The degree of the polynomial to be fit.
324
- gaussian_bandwidth: float or None
325
- If float, performs regression with Gaussian weights whose variance is
326
- the gaussian_bandwidth. If None, performs unweighted regression.
327
331
328
332
Returns
329
333
----------
330
334
coeffs: np.ndarray
331
- A vector of coefficients of length nl that determines the savgol
335
+ A vector of coefficients of length nr - nl + 1 that determines the savgol
332
336
convolution filter.
333
337
"""
334
338
if nl >= nr :
335
339
raise ValueError ("The left window bound should be less than the right." )
336
340
if nr > 0 :
337
- raise warnings .warn ("The filter is no longer causal." )
341
+ warnings .warn ("The filter is no longer causal." )
338
342
339
343
A = np .vstack (
340
- [np .arange (nl , nr + 1 ) ** j for j in range (self . poly_fit_degree + 1 )]
344
+ [np .arange (nl , nr + 1 ) ** j for j in range (poly_fit_degree + 1 )]
341
345
).T
342
346
343
347
if self .gaussian_bandwidth is None :
@@ -388,8 +392,10 @@ def savgol_smoother(self, signal):
388
392
# At the very edge, the design matrix is often singular, in which case
389
393
# we just fall back to the raw signal
390
394
try :
391
- signal_smoothed [ix ] = self .savgol_predict (signal [:ix + 1 ])
392
- except np .linalg .LinAlgError :
395
+ signal_smoothed [ix ] = self .savgol_predict (
396
+ signal [: ix + 1 ], self .poly_fit_degree , 0
397
+ )
398
+ except np .linalg .LinAlgError : # for small ix, the design matrix is singular
393
399
signal_smoothed [ix ] = signal [ix ]
394
400
return signal_smoothed
395
401
elif self .boundary_method == "identity" :
@@ -402,19 +408,13 @@ def savgol_smoother(self, signal):
402
408
def savgol_impute (self , signal ):
403
409
"""Impute the nan values in signal using savgol.
404
410
405
- This method fills the nan values in the signal with an M-degree polynomial fit
411
+ This method fills the nan values in the signal with a quadratic polynomial fit
406
412
on a rolling window of the immediate past up to window_length data points.
407
413
408
- In the case of a single data point in the past, the single data point is
409
- continued. In the case of no data points in the past (i.e. the signal starts
410
- with nan), an error is raised.
414
+ A number of boundary cases are handled involving nan filling close to the boundary.
411
415
412
416
Note that in the case of many adjacent nans, the method will use previously
413
- imputed values to do the fitting for later values. E.g. for
414
- >>> x = np.array([1.0, 2.0, np.nan, 1.0, np.nan])
415
- the last np.nan will be fit on np.array([1.0, 2.0, *, 1.0]), where * is the
416
- result of imputing based on np.array([1.0, 2.0]) (depends on the savgol
417
- settings).
417
+ imputed values to do the fitting for later values.
418
418
419
419
Parameters
420
420
----------
@@ -430,17 +430,24 @@ def savgol_impute(self, signal):
430
430
for ix in np .where (np .isnan (signal ))[0 ]:
431
431
# Boundary cases
432
432
if ix < self .window_length :
433
- # A nan following a single value should just be extended
433
+ # At the boundary, a single value should just be extended
434
434
if ix == 1 :
435
- signal_imputed [ix ] = signal_imputed [0 ]
436
- # Otherwise, use savgol fitting
435
+ signal_imputed [ix ] = signal_imputed [ix - 1 ]
436
+ # Reduce the polynomial degree if needed
437
+ elif ix == 2 :
438
+ signal_imputed [ix ] = self .savgol_predict (
439
+ signal_imputed [:ix ], 1 , - 1
440
+ )
441
+ # Otherwise, use savgol fitting on the largest window prior
437
442
else :
438
- coeffs = self .savgol_coeffs (- ix , - 1 )
439
- signal_imputed [ix ] = signal_imputed [:ix ] @ coeffs
440
- # Use a polynomial fit on the past window length to impute
443
+ signal_imputed [ix ] = self .savgol_predict (
444
+ signal_imputed [:ix ], self .poly_fit_degree , - 1
445
+ )
446
+ # Away from the boundary, use savgol fitting on a fixed window
441
447
else :
442
- coeffs = self .savgol_coeffs (- self .window_length , - 1 )
443
- signal_imputed [ix ] = (
444
- signal_imputed [ix - self .window_length : ix ] @ coeffs
448
+ signal_imputed [ix ] = self .savgol_predict (
449
+ signal_imputed [ix - self .window_length : ix ],
450
+ self .poly_fit_degree ,
451
+ - 1 ,
445
452
)
446
453
return signal_imputed
0 commit comments