13
13
import warnings
14
14
15
15
import numpy as np
16
- import pandas as pd
17
16
18
17
19
18
class Smoother :
20
19
"""
21
- This is the smoothing utility class. It handles imputation and smoothing.
22
- Reasonable defaults are given for all the parameters, but fine-grained
23
- control is exposed.
24
-
25
- Instantiating a smoother class specifies a smoother with a host of parameters,
26
- which can then be applied to an np.ndarray with the function smooth:
27
- > smoother = Smoother(smoother_name='savgol', window_length=28, gaussian_bandwidth=144)
28
- > smoothed_signal = smoother.smooth(signal)
20
+ This is the smoothing utility class. This class holds the parameter settings for its smoother
21
+ methods and provides reasonable defaults. Basic usage can be found in the examples below.
22
+
23
+ The smoother function takes numpy arrays as input, expecting the values to come from a
24
+ regularly-spaced time grid. NANs are ok, as long as the array does not begin with a NAN. The
25
+ rest of the NANs will be handled via imputation by default, though this can be turned off.
29
26
30
27
Parameters
31
28
----------
@@ -53,8 +50,8 @@ class Smoother:
53
50
35 905
54
51
42 1303
55
52
impute: {'savgol', 'zeros', None}
56
- If 'savgol' (default), will fill nan values with a savgol fit on the largest available time
57
- window prior (up to window_length). If 'zeros', will fill nan values with zeros.
53
+ If 'savgol' (default), will fill nan values with a savgol fit on the largest available time
54
+ window prior (up to window_length). If 'zeros', will fill nan values with zeros.
58
55
If None, leaves the nans in place.
59
56
minval: float or None
60
57
The smallest value to allow in a signal. If None, there is no smallest value.
@@ -66,6 +63,33 @@ class Smoother:
66
63
----------
67
64
smooth: np.ndarray
68
65
Takes a 1D signal and returns a smoothed version. Both arrays have the same length.
66
+
67
+ Example Usage
68
+ -------------
69
+ Example 1. Apply a rolling average smoother with a window of length 10.
70
+ >>> smoother = Smoother(smoother_name='moving_average', window_length=10)
71
+ >>> smoothed_signal = smoother.smooth(signal)
72
+
73
+ Example 2. Smooth a dataframe column.
74
+ >>> smoother = Smoother(smoother_name="savgol")
75
+ >>> df[col] = pd.Series(smoother.smooth(df[col].to_numpy()))
76
+
77
+ Example 3. Apply a rolling weighted average smoother, with 95% weight on the recent 2 weeks and
78
+ a sharp cutoff after 4 weeks.
79
+ >>> smoother = Smoother(smoother_name='savgol', poly_fit_degree=0, window_length=28,
80
+ gaussian_bandwidth=144)
81
+ >>> smoothed_signal = smoother.smooth(signal)
82
+
83
+ Example 4. Apply a local linear regression smoother (equivalent to `left_gauss_linear`), with
84
+ 95% weight on the recent week and a sharp cutoff after 3 weeks.
85
+ >>> smoother = Smoother(smoother_name='savgol', poly_fit_degree=1, window_length=21,
86
+ gaussian_bandwidth=36)
87
+ >>> smoothed_signal = smoother.smooth(signal)
88
+
89
+ Example 5. Apply the identity function (simplifies code that iterates through smoothers _and_
90
+ raw data).
91
+ >>> smoother = Smoother(smoother_name='identity')
92
+ >>> smoothed_signal = smoother.smooth(signal)
69
93
"""
70
94
71
95
def __init__ (
@@ -95,14 +119,14 @@ def __init__(
95
119
else :
96
120
self .coeffs = None
97
121
98
- SMOOTHERS = {"savgol" , "left_gauss_linear" , "moving_average" , "identity" }
122
+ valid_smoothers = {"savgol" , "left_gauss_linear" , "moving_average" , "identity" }
99
123
100
- if self .smoother_name not in SMOOTHERS :
124
+ if self .smoother_name not in valid_smoothers :
101
125
raise ValueError ("Invalid smoother name given." )
102
126
103
- IMPUTE_METHODS = {"savgol" , "zeros" , None }
127
+ valid_impute_methods = {"savgol" , "zeros" , None }
104
128
105
- if self .impute_method not in IMPUTE_METHODS :
129
+ if self .impute_method not in valid_impute_methods :
106
130
raise ValueError ("Invalid impute method given." )
107
131
108
132
def smooth (self , signal ):
@@ -118,6 +142,9 @@ def smooth(self, signal):
118
142
signal_smoothed: np.ndarray
119
143
A smoothed 1D signal.
120
144
"""
145
+ if len (signal ) < self .window_length :
146
+ raise ValueError ("The window_length must be smaller than the length of the signal." )
147
+
121
148
signal = self .impute (signal )
122
149
123
150
if self .smoother_name == "savgol" :
@@ -223,9 +250,9 @@ def left_gauss_linear_smoother(self, signal):
223
250
224
251
def savgol_predict (self , signal ):
225
252
"""
226
- Fits a polynomial through the values given by the signal and returns the value
253
+ Fits a polynomial through the values given by the signal and returns the value
227
254
of the polynomial at the right-most signal-value. More precisely, fits a polynomial
228
- f(t) of degree poly_fit_degree through the points signal[0], signal[1] ..., signal[-1],
255
+ f(t) of degree poly_fit_degree through the points signal[0], signal[1] ..., signal[-1],
229
256
and returns the evaluation of the polynomial at the location of signal[-1].
230
257
231
258
Parameters
@@ -250,7 +277,7 @@ def savgol_coeffs(cls, nl, nr, poly_fit_degree, gaussian_bandwidth=100):
250
277
Solves for the Savitzky-Golay coefficients. The coefficients c_i
251
278
give a filter so that
252
279
y = \sum_{i=-{n_l}}^{n_r} c_i x_i
253
- is the value at 0 (thus the constant term) of the polynomial fit
280
+ is the value at 0 (thus the constant term) of the polynomial fit
254
281
through the points {x_i}. The coefficients are c_i are caluclated as
255
282
c_i = ((A.T @ A)^(-1) @ (A.T @ e_i))_0
256
283
where A is the design matrix of the polynomial fit and e_i is the standard
@@ -266,7 +293,7 @@ def savgol_coeffs(cls, nl, nr, poly_fit_degree, gaussian_bandwidth=100):
266
293
poly_fit_degree: int
267
294
The degree of the polynomial to be fit.
268
295
gaussian_bandwidth: float or None
269
- If float, performs regression with Gaussian weights whose variance is
296
+ If float, performs regression with Gaussian weights whose variance is
270
297
the gaussian_bandwidth. If None, performs unweighted regression.
271
298
272
299
Returns
@@ -396,35 +423,3 @@ def savgol_impute(self, signal):
396
423
)
397
424
signal_imputed [ix ] = signal_imputed [ix - self .window_length : ix ] @ coeffs
398
425
return signal_imputed
399
-
400
-
401
- # TODO: this needs a test, probably
402
- def smoothed_values_by_geo_id (
403
- df : pd .DataFrame , method = "savgol" , ** kwargs
404
- ) -> np .ndarray :
405
- """Computes a smoothed version of the variable 'val' within unique values of 'geo_id'
406
-
407
- Currently uses a local weighted least squares, where the weights are given
408
- by a Gaussian kernel.
409
-
410
- Parameters
411
- ----------
412
- df: pd.DataFrame
413
- A data frame with columns "geo_id", "timestamp", and "val"
414
- method: {'savgol', 'left_gauss_linear', 'moving_average'}
415
- A choice of window smoother to use. Check the smoother method definitions
416
- for specific parameters.
417
-
418
- Returns
419
- -------
420
- np.ndarray
421
- A one-dimensional numpy array containing the smoothed values.
422
- """
423
- smoother = Smoother (method , ** kwargs )
424
-
425
- df = df .copy ()
426
- df ["val_smooth" ] = 0
427
- for geo_id in df ["geo_id" ].unique ():
428
- signal = df [df ["geo_id" ] == geo_id ]["val" ].values
429
- df .loc [df ["geo_id" ] == geo_id , "val_smooth" ] = smoother .smooth (signal )
430
- return df ["val_smooth" ].values
0 commit comments