@@ -24,17 +24,18 @@ class Smoother:
24
24
25
25
Instantiating a smoother class specifies a smoother with a host of parameters,
26
26
which can then be applied to an np.ndarray with the function smooth:
27
- > smoother = Smoother(method_name ='savgol', window_length=28, gaussian_bandwidth=100 )
27
+ > smoother = Smoother(smoother_name ='savgol', window_length=28, gaussian_bandwidth=144 )
28
28
> smoothed_signal = smoother.smooth(signal)
29
29
30
30
Parameters
31
31
----------
32
- method_name : {'savgol', 'left_gauss_linear', 'moving_average'}
33
- This variable specifies the smoothing method . We have three methods , currently:
34
- * 'savgol' or a Savtizky-Golay smoother
32
+ smoother_name : {'savgol', 'left_gauss_linear', 'moving_average', 'identity '}
33
+ This variable specifies the smoother . We have four smoothers , currently:
34
+ * 'savgol' or a Savtizky-Golay smoother (default)
35
35
* 'left_gauss_linear' or a Gaussian-weight linear regression smoother
36
36
* 'moving_average' or a moving window average smoother
37
- Descriptions of the methods are available in the doc strings. Full details are
37
+ * 'identity' or the trivial smoother (no smoothing)
38
+ Descriptions of the smoothers are available in the doc strings. Full details are
38
39
in: https://github.com/cmu-delphi/covidcast-modeling/indicators_smoother.
39
40
window_length: int
40
41
The length of the averaging window for 'savgol' and 'moving_average'.
@@ -51,39 +52,40 @@ class Smoother:
51
52
28 579
52
53
35 905
53
54
42 1303
54
- impute: bool
55
- If True, will fill nan values before smoothing. Currently uses the 'savgol' method
56
- for imputation.
55
+ impute: {'savgol', 'zeros', None}
56
+ If 'savgol' (default), will fill nan values with a savgol fit on the largest available time
57
+ window prior (up to window_length). If 'zeros', will fill nan values with zeros.
58
+ If None, leaves the nans in place.
57
59
minval: float or None
58
60
The smallest value to allow in a signal. If None, there is no smallest value.
59
61
Currently only implemented for 'left_gauss_linear'.
60
62
poly_fit_degree: int
61
- A parameter for the 'savgol' method which sets the degree of the polynomial fit.
63
+ A parameter for the 'savgol' smoother which sets the degree of the polynomial fit.
62
64
63
65
Methods
64
66
----------
65
67
smooth: np.ndarray
66
- Takes a 1D signal and returns a smoothed version.
68
+ Takes a 1D signal and returns a smoothed version. Both arrays have the same length.
67
69
"""
68
70
69
71
def __init__ (
70
72
self ,
71
- method_name = "savgol" ,
73
+ smoother_name = "savgol" ,
72
74
poly_fit_degree = 2 ,
73
75
window_length = 28 ,
74
76
gaussian_bandwidth = 144 , # a ~2 week window
75
- impute = True ,
77
+ impute_method = "savgol" ,
76
78
minval = None ,
77
79
boundary_method = "shortened_window" ,
78
80
):
79
- self .method_name = method_name
81
+ self .smoother_name = smoother_name
80
82
self .poly_fit_degree = poly_fit_degree
81
83
self .window_length = window_length
82
84
self .gaussian_bandwidth = gaussian_bandwidth
83
- self .impute = impute
85
+ self .impute_method = impute_method
84
86
self .minval = minval
85
87
self .boundary_method = boundary_method
86
- if method_name == "savgol" :
88
+ if smoother_name == "savgol" :
87
89
self .coeffs = self .savgol_coeffs (
88
90
- self .window_length + 1 ,
89
91
0 ,
@@ -93,15 +95,20 @@ def __init__(
93
95
else :
94
96
self .coeffs = None
95
97
96
- METHODS = {"savgol" , "left_gauss_linear" , "moving_average" , "identity" }
98
+ SMOOTHERS = {"savgol" , "left_gauss_linear" , "moving_average" , "identity" }
97
99
98
- if self .method_name not in METHODS :
99
- raise ValueError ("Invalid method name given." )
100
+ if self .smoother_name not in SMOOTHERS :
101
+ raise ValueError ("Invalid smoother name given." )
102
+
103
+ IMPUTE_METHODS = {"savgol" , "zeros" , None }
104
+
105
+ if self .impute_method not in IMPUTE_METHODS :
106
+ raise ValueError ("Invalid impute method given." )
100
107
101
108
def smooth (self , signal ):
102
109
"""
103
110
The major workhorse smoothing function. Can use one of three smoothing
104
- methods, as specified by the class variable method_name .
111
+ methods, as specified by the class variable smoother_name .
105
112
106
113
Parameters
107
114
----------
@@ -111,20 +118,42 @@ def smooth(self, signal):
111
118
signal_smoothed: np.ndarray
112
119
A smoothed 1D signal.
113
120
"""
114
- if self .impute :
115
- signal = self .savgol_impute (signal )
121
+ signal = self .impute (signal )
116
122
117
- if self .method_name == "savgol" :
123
+ if self .smoother_name == "savgol" :
118
124
signal_smoothed = self .savgol_smoother (signal )
119
- elif self .method_name == "left_gauss_linear" :
125
+ elif self .smoother_name == "left_gauss_linear" :
120
126
signal_smoothed = self .left_gauss_linear_smoother (signal )
121
- elif self .method_name == "moving_average" :
127
+ elif self .smoother_name == "moving_average" :
122
128
signal_smoothed = self .moving_average_smoother (signal )
123
- elif self .method_name == "identity" :
129
+ elif self .smoother_name == "identity" :
124
130
signal_smoothed = signal
125
131
126
132
return signal_smoothed
127
133
134
+ def impute (self , signal ):
135
+ """
136
+ Imputes the nan values in the signal.
137
+
138
+ Parameters
139
+ ----------
140
+ signal: np.ndarray
141
+ 1D signal to be imputed.
142
+
143
+ Returns
144
+ -------
145
+ imputed_signal: np.ndarray
146
+ Imputed signal.
147
+ """
148
+ if self .impute_method == "savgol" :
149
+ imputed_signal = self .savgol_impute (signal )
150
+ elif self .impute_method == "zeros" :
151
+ imputed_signal = np .nan_to_num (signal )
152
+ elif self .impute_method is None :
153
+ imputed_signal = np .copy (signal )
154
+
155
+ return imputed_signal
156
+
128
157
def moving_average_smoother (self , signal ):
129
158
"""
130
159
Computes a moving average on the signal.
@@ -322,10 +351,19 @@ def savgol_smoother(self, signal):
322
351
323
352
def savgol_impute (self , signal ):
324
353
"""
325
- This method looks through the signal, finds the nan values, and imputes them
326
- using an M-degree polynomial fit on the previous window_length data points.
327
- The boundary cases, i.e. nans within wl of the start of the array
328
- are imputed with a window length shrunk to the data available.
354
+ This method fills the nan values in the signal with an M-degree polynomial fit
355
+ on a rolling window of the immediate past up to window_length data points.
356
+
357
+ In the case of a single data point in the past, the single data point is
358
+ continued. In the case of no data points in the past (i.e. the signal starts
359
+ with nan), an error is raised.
360
+
361
+ Note that in the case of many adjacent nans, the method will use previously
362
+ imputed values to do the fitting for later values. E.g. for
363
+ >>> x = np.array([1.0, 2.0, np.nan, 1.0, np.nan])
364
+ the last np.nan will be fit on np.array([1.0, 2.0, *, 1.0]), where * is the
365
+ result of imputing based on np.array([1.0, 2.0]) (depends on the savgol
366
+ settings).
329
367
330
368
Parameters
331
369
----------
@@ -342,25 +380,21 @@ def savgol_impute(self, signal):
342
380
raise ValueError ("The signal should not begin with a nan value." )
343
381
for ix in np .where (np .isnan (signal ))[0 ]:
344
382
if ix < self .window_length :
345
- if ix == 0 :
346
- signal_imputed [ix ] = signal [ix ]
347
- elif ix == 1 :
348
- signal_imputed [ix ] = (
349
- signal [ix ] if not np .isnan (signal [ix ]) else signal [0 ]
350
- )
383
+ if ix == 1 :
384
+ signal_imputed [ix ] = signal_imputed [0 ]
351
385
else :
352
386
coeffs = self .savgol_coeffs (
353
387
- ix , - 1 , self .poly_fit_degree , self .gaussian_bandwidth
354
388
)
355
- signal_imputed [ix ] = signal [:ix ] @ coeffs
389
+ signal_imputed [ix ] = signal_imputed [:ix ] @ coeffs
356
390
else :
357
391
coeffs = self .savgol_coeffs (
358
392
- self .window_length ,
359
393
- 1 ,
360
394
self .poly_fit_degree ,
361
395
self .gaussian_bandwidth ,
362
396
)
363
- signal_imputed [ix ] = signal [ix - self .window_length : ix ] @ coeffs
397
+ signal_imputed [ix ] = signal_imputed [ix - self .window_length : ix ] @ coeffs
364
398
return signal_imputed
365
399
366
400
0 commit comments