@@ -42,7 +42,8 @@ class Smoother:
42
42
If float, all regression is done with Gaussian weights whose variance is
43
43
half the gaussian_bandwidth. If None, performs unweighted regression. (Applies
44
44
to 'left_gauss_linear' and 'savgol'.)
45
- Here are some reference values:
45
+ Here are some reference values (the given bandwidth produces a 95% weighting on
46
+ the data of length time window into the past):
46
47
time window | bandwidth
47
48
7 36
48
49
14 144
@@ -82,8 +83,9 @@ class Smoother:
82
83
gaussian_bandwidth=144)
83
84
>>> smoothed_signal = smoother.smooth(signal)
84
85
85
- Example 4. Apply a local linear regression smoother (equivalent to `left_gauss_linear`), with
86
- 95% weight on the recent week and a sharp cutoff after 3 weeks.
86
+ Example 4. Apply a local linear regression smoother (essentially equivalent to
87
+ `left_gauss_linear`), with 95% weight on the recent week and a sharp
88
+ cutoff after 3 weeks.
87
89
>>> smoother = Smoother(smoother_name='savgol', poly_fit_degree=1, window_length=21,
88
90
gaussian_bandwidth=36)
89
91
>>> smoothed_signal = smoother.smooth(signal)
@@ -111,26 +113,19 @@ def __init__(
111
113
self .impute_method = impute_method
112
114
self .minval = minval
113
115
self .boundary_method = boundary_method
114
- if smoother_name == "savgol" :
115
- self .coeffs = self .savgol_coeffs (
116
- - self .window_length + 1 ,
117
- 0 ,
118
- self .poly_fit_degree ,
119
- self .gaussian_bandwidth ,
120
- )
121
- else :
122
- self .coeffs = None
123
116
124
117
valid_smoothers = {"savgol" , "left_gauss_linear" , "moving_average" , "identity" }
125
-
118
+ valid_impute_methods = { "savgol" , "zeros" , None }
126
119
if self .smoother_name not in valid_smoothers :
127
120
raise ValueError ("Invalid smoother name given." )
128
-
129
- valid_impute_methods = {"savgol" , "zeros" , None }
130
-
131
121
if self .impute_method not in valid_impute_methods :
132
122
raise ValueError ("Invalid impute method given." )
133
123
124
+ if smoother_name == "savgol" :
125
+ self .coeffs = self .savgol_coeffs (- self .window_length + 1 , 0 )
126
+ else :
127
+ self .coeffs = None
128
+
134
129
def smooth (self , signal ):
135
130
"""
136
131
The major workhorse smoothing function. Can use one of three smoothing
@@ -145,7 +140,9 @@ def smooth(self, signal):
145
140
A smoothed 1D signal.
146
141
"""
147
142
if len (signal ) < self .window_length :
148
- raise ValueError ("The window_length must be smaller than the length of the signal." )
143
+ raise ValueError (
144
+ "The window_length must be smaller than the length of the signal."
145
+ )
149
146
150
147
signal = self .impute (signal )
151
148
@@ -175,6 +172,10 @@ def impute(self, signal):
175
172
Imputed signal.
176
173
"""
177
174
if self .impute_method == "savgol" :
175
+ # We cannot impute if the signal begins with a NaN (there is no information to go by).
176
+ # To preserve input-output array lengths, this util will not drop NaNs for you.
177
+ if np .isnan (signal [0 ]):
178
+ raise ValueError ("The signal should not begin with a nan value." )
178
179
imputed_signal = self .savgol_impute (signal )
179
180
elif self .impute_method == "zeros" :
180
181
imputed_signal = np .nan_to_num (signal )
@@ -221,8 +222,6 @@ def left_gauss_linear_smoother(self, signal):
221
222
At each time t, we use the data from times 1, ..., t-dt, weighted
222
223
using the Gaussian kernel, to produce the estimate at time t.
223
224
224
- For math details, see the smoothing_docs folder.
225
-
226
225
Parameters
227
226
----------
228
227
signal: np.ndarray
@@ -261,8 +260,8 @@ def savgol_predict(self, signal):
261
260
"""
262
261
Fits a polynomial through the values given by the signal and returns the value
263
262
of the polynomial at the right-most signal-value. More precisely, fits a polynomial
264
- f(t) of degree poly_fit_degree through the points signal[0 ], signal[1] ..., signal[-1],
265
- and returns the evaluation of the polynomial at the location of signal[-1 ].
263
+ f(t) of degree poly_fit_degree through the points signal[-n ], signal[-n+ 1] ..., signal[-1],
264
+ and returns the evaluation of the polynomial at the location of signal[0 ].
266
265
267
266
Parameters
268
267
----------
@@ -271,23 +270,20 @@ def savgol_predict(self, signal):
271
270
272
271
Returns
273
272
----------
274
- coeffs: np.ndarray
275
- A vector of coefficients of length nl that determines the savgol
276
- convolution filter.
273
+ predicted_value: float
274
+ The anticipated value that comes after the end of the signal based on a polynomial fit.
277
275
"""
278
- coeffs = self .savgol_coeffs (
279
- - len (signal ) + 1 , 0 , self .poly_fit_degree , self .gaussian_bandwidth
280
- )
281
- return signal @ coeffs
276
+ coeffs = self .savgol_coeffs (- len (signal ) + 1 , 0 )
277
+ predicted_value = signal @ coeffs
278
+ return predicted_value
282
279
283
- @classmethod
284
- def savgol_coeffs (cls , nl , nr , poly_fit_degree , gaussian_bandwidth = 100 ):
280
+ def savgol_coeffs (self , nl , nr ):
285
281
"""
286
282
Solves for the Savitzky-Golay coefficients. The coefficients c_i
287
283
give a filter so that
288
284
y = \sum_{i=-{n_l}}^{n_r} c_i x_i
289
285
is the value at 0 (thus the constant term) of the polynomial fit
290
- through the points {x_i}. The coefficients are c_i are caluclated as
286
+ through the points {x_i}. The coefficients are c_i are calculated as
291
287
c_i = ((A.T @ A)^(-1) @ (A.T @ e_i))_0
292
288
where A is the design matrix of the polynomial fit and e_i is the standard
293
289
basis vector i. This is currently done via a full inversion, which can be
@@ -317,16 +313,15 @@ def savgol_coeffs(cls, nl, nr, poly_fit_degree, gaussian_bandwidth=100):
317
313
raise warnings .warn ("The filter is no longer causal." )
318
314
319
315
A = np .vstack (
320
- [np .arange (nl , nr + 1 ) ** j for j in range (poly_fit_degree + 1 )]
316
+ [np .arange (nl , nr + 1 ) ** j for j in range (self . poly_fit_degree + 1 )]
321
317
).T
322
318
323
- if gaussian_bandwidth is None :
319
+ if self . gaussian_bandwidth is None :
324
320
mat_inverse = np .linalg .inv (A .T @ A ) @ A .T
325
321
else :
326
- weights = np .exp (- ((np .arange (nl , nr + 1 )) ** 2 ) / gaussian_bandwidth )
322
+ weights = np .exp (- ((np .arange (nl , nr + 1 )) ** 2 ) / self . gaussian_bandwidth )
327
323
mat_inverse = np .linalg .inv ((A .T * weights ) @ A ) @ (A .T * weights )
328
324
window_length = nr - nl + 1
329
- basis_vector = np .zeros (window_length )
330
325
coeffs = np .zeros (window_length )
331
326
for i in range (window_length ):
332
327
basis_vector = np .zeros (window_length )
@@ -412,23 +407,20 @@ def savgol_impute(self, signal):
412
407
An imputed 1D signal.
413
408
"""
414
409
signal_imputed = np .copy (signal )
415
- if np .isnan (signal [0 ]):
416
- raise ValueError ("The signal should not begin with a nan value." )
417
410
for ix in np .where (np .isnan (signal ))[0 ]:
411
+ # Boundary cases
418
412
if ix < self .window_length :
413
+ # A nan following a single value should just be extended
419
414
if ix == 1 :
420
415
signal_imputed [ix ] = signal_imputed [0 ]
416
+ # Otherwise, use savgol fitting
421
417
else :
422
- coeffs = self .savgol_coeffs (
423
- - ix , - 1 , self .poly_fit_degree , self .gaussian_bandwidth
424
- )
418
+ coeffs = self .savgol_coeffs (- ix , - 1 )
425
419
signal_imputed [ix ] = signal_imputed [:ix ] @ coeffs
420
+ # Use a polynomial fit on the past window length to impute
426
421
else :
427
- coeffs = self .savgol_coeffs (
428
- - self .window_length ,
429
- - 1 ,
430
- self .poly_fit_degree ,
431
- self .gaussian_bandwidth ,
422
+ coeffs = self .savgol_coeffs (- self .window_length , - 1 )
423
+ signal_imputed [ix ] = (
424
+ signal_imputed [ix - self .window_length : ix ] @ coeffs
432
425
)
433
- signal_imputed [ix ] = signal_imputed [ix - self .window_length : ix ] @ coeffs
434
426
return signal_imputed
0 commit comments