Skip to content

Commit 4bf3410

Browse files
committed
Small smoother updates:
* move smoother first value nan check to impute, so it's less buried * clean documentation in a few functions * remove @classmethod from savgol_coeffs * fix breaking tests
1 parent 9925348 commit 4bf3410

File tree

2 files changed

+40
-48
lines changed

2 files changed

+40
-48
lines changed

_delphi_utils_python/delphi_utils/smooth.py

Lines changed: 38 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ class Smoother:
4242
If float, all regression is done with Gaussian weights whose variance is
4343
half the gaussian_bandwidth. If None, performs unweighted regression. (Applies
4444
to 'left_gauss_linear' and 'savgol'.)
45-
Here are some reference values:
45+
Here are some reference values (the given bandwidth produces a 95% weighting on
46+
the data of length time window into the past):
4647
time window | bandwidth
4748
7 36
4849
14 144
@@ -82,8 +83,9 @@ class Smoother:
8283
gaussian_bandwidth=144)
8384
>>> smoothed_signal = smoother.smooth(signal)
8485
85-
Example 4. Apply a local linear regression smoother (equivalent to `left_gauss_linear`), with
86-
95% weight on the recent week and a sharp cutoff after 3 weeks.
86+
Example 4. Apply a local linear regression smoother (essentially equivalent to
87+
`left_gauss_linear`), with 95% weight on the recent week and a sharp
88+
cutoff after 3 weeks.
8789
>>> smoother = Smoother(smoother_name='savgol', poly_fit_degree=1, window_length=21,
8890
gaussian_bandwidth=36)
8991
>>> smoothed_signal = smoother.smooth(signal)
@@ -111,26 +113,19 @@ def __init__(
111113
self.impute_method = impute_method
112114
self.minval = minval
113115
self.boundary_method = boundary_method
114-
if smoother_name == "savgol":
115-
self.coeffs = self.savgol_coeffs(
116-
-self.window_length + 1,
117-
0,
118-
self.poly_fit_degree,
119-
self.gaussian_bandwidth,
120-
)
121-
else:
122-
self.coeffs = None
123116

124117
valid_smoothers = {"savgol", "left_gauss_linear", "moving_average", "identity"}
125-
118+
valid_impute_methods = {"savgol", "zeros", None}
126119
if self.smoother_name not in valid_smoothers:
127120
raise ValueError("Invalid smoother name given.")
128-
129-
valid_impute_methods = {"savgol", "zeros", None}
130-
131121
if self.impute_method not in valid_impute_methods:
132122
raise ValueError("Invalid impute method given.")
133123

124+
if smoother_name == "savgol":
125+
self.coeffs = self.savgol_coeffs(-self.window_length + 1, 0)
126+
else:
127+
self.coeffs = None
128+
134129
def smooth(self, signal):
135130
"""
136131
The major workhorse smoothing function. Can use one of three smoothing
@@ -145,7 +140,9 @@ def smooth(self, signal):
145140
A smoothed 1D signal.
146141
"""
147142
if len(signal) < self.window_length:
148-
raise ValueError("The window_length must be smaller than the length of the signal.")
143+
raise ValueError(
144+
"The window_length must be smaller than the length of the signal."
145+
)
149146

150147
signal = self.impute(signal)
151148

@@ -175,6 +172,10 @@ def impute(self, signal):
175172
Imputed signal.
176173
"""
177174
if self.impute_method == "savgol":
175+
# We cannot impute if the signal begins with a NaN (there is no information to go by).
176+
# To preserve input-output array lengths, this util will not drop NaNs for you.
177+
if np.isnan(signal[0]):
178+
raise ValueError("The signal should not begin with a nan value.")
178179
imputed_signal = self.savgol_impute(signal)
179180
elif self.impute_method == "zeros":
180181
imputed_signal = np.nan_to_num(signal)
@@ -221,8 +222,6 @@ def left_gauss_linear_smoother(self, signal):
221222
At each time t, we use the data from times 1, ..., t-dt, weighted
222223
using the Gaussian kernel, to produce the estimate at time t.
223224
224-
For math details, see the smoothing_docs folder.
225-
226225
Parameters
227226
----------
228227
signal: np.ndarray
@@ -261,8 +260,8 @@ def savgol_predict(self, signal):
261260
"""
262261
Fits a polynomial through the values given by the signal and returns the value
263262
of the polynomial at the right-most signal-value. More precisely, fits a polynomial
264-
f(t) of degree poly_fit_degree through the points signal[0], signal[1] ..., signal[-1],
265-
and returns the evaluation of the polynomial at the location of signal[-1].
263+
f(t) of degree poly_fit_degree through the points signal[-n], signal[-n+1] ..., signal[-1],
264+
and returns the evaluation of the polynomial at the location of signal[0].
266265
267266
Parameters
268267
----------
@@ -271,23 +270,20 @@ def savgol_predict(self, signal):
271270
272271
Returns
273272
----------
274-
coeffs: np.ndarray
275-
A vector of coefficients of length nl that determines the savgol
276-
convolution filter.
273+
predicted_value: float
274+
The anticipated value that comes after the end of the signal based on a polynomial fit.
277275
"""
278-
coeffs = self.savgol_coeffs(
279-
-len(signal) + 1, 0, self.poly_fit_degree, self.gaussian_bandwidth
280-
)
281-
return signal @ coeffs
276+
coeffs = self.savgol_coeffs(-len(signal) + 1, 0)
277+
predicted_value = signal @ coeffs
278+
return predicted_value
282279

283-
@classmethod
284-
def savgol_coeffs(cls, nl, nr, poly_fit_degree, gaussian_bandwidth=100):
280+
def savgol_coeffs(self, nl, nr):
285281
"""
286282
Solves for the Savitzky-Golay coefficients. The coefficients c_i
287283
give a filter so that
288284
y = \sum_{i=-{n_l}}^{n_r} c_i x_i
289285
is the value at 0 (thus the constant term) of the polynomial fit
290-
through the points {x_i}. The coefficients are c_i are caluclated as
286+
through the points {x_i}. The coefficients are c_i are calculated as
291287
c_i = ((A.T @ A)^(-1) @ (A.T @ e_i))_0
292288
where A is the design matrix of the polynomial fit and e_i is the standard
293289
basis vector i. This is currently done via a full inversion, which can be
@@ -317,16 +313,15 @@ def savgol_coeffs(cls, nl, nr, poly_fit_degree, gaussian_bandwidth=100):
317313
raise warnings.warn("The filter is no longer causal.")
318314

319315
A = np.vstack(
320-
[np.arange(nl, nr + 1) ** j for j in range(poly_fit_degree + 1)]
316+
[np.arange(nl, nr + 1) ** j for j in range(self.poly_fit_degree + 1)]
321317
).T
322318

323-
if gaussian_bandwidth is None:
319+
if self.gaussian_bandwidth is None:
324320
mat_inverse = np.linalg.inv(A.T @ A) @ A.T
325321
else:
326-
weights = np.exp(-((np.arange(nl, nr + 1)) ** 2) / gaussian_bandwidth)
322+
weights = np.exp(-((np.arange(nl, nr + 1)) ** 2) / self.gaussian_bandwidth)
327323
mat_inverse = np.linalg.inv((A.T * weights) @ A) @ (A.T * weights)
328324
window_length = nr - nl + 1
329-
basis_vector = np.zeros(window_length)
330325
coeffs = np.zeros(window_length)
331326
for i in range(window_length):
332327
basis_vector = np.zeros(window_length)
@@ -412,23 +407,20 @@ def savgol_impute(self, signal):
412407
An imputed 1D signal.
413408
"""
414409
signal_imputed = np.copy(signal)
415-
if np.isnan(signal[0]):
416-
raise ValueError("The signal should not begin with a nan value.")
417410
for ix in np.where(np.isnan(signal))[0]:
411+
# Boundary cases
418412
if ix < self.window_length:
413+
# A nan following a single value should just be extended
419414
if ix == 1:
420415
signal_imputed[ix] = signal_imputed[0]
416+
# Otherwise, use savgol fitting
421417
else:
422-
coeffs = self.savgol_coeffs(
423-
-ix, -1, self.poly_fit_degree, self.gaussian_bandwidth
424-
)
418+
coeffs = self.savgol_coeffs(-ix, -1)
425419
signal_imputed[ix] = signal_imputed[:ix] @ coeffs
420+
# Use a polynomial fit on the past window length to impute
426421
else:
427-
coeffs = self.savgol_coeffs(
428-
-self.window_length,
429-
-1,
430-
self.poly_fit_degree,
431-
self.gaussian_bandwidth,
422+
coeffs = self.savgol_coeffs(-self.window_length, -1)
423+
signal_imputed[ix] = (
424+
signal_imputed[ix - self.window_length : ix] @ coeffs
432425
)
433-
signal_imputed[ix] = signal_imputed[ix - self.window_length : ix] @ coeffs
434426
return signal_imputed

_delphi_utils_python/tests/test_smooth.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def test_left_gauss_linear_smoother(self):
4242

4343
# The smoother should basically be the identity when the Gaussian kernel
4444
# is set to weigh the present value overwhelmingly
45-
signal = np.arange(1, 10) + np.random.normal(0, 1, 9)
45+
signal = np.arange(1, 30) + np.random.normal(0, 1, 29)
4646
smoother = Smoother(smoother_name="left_gauss_linear", gaussian_bandwidth=0.1)
4747
assert np.allclose(smoother.smooth(signal)[1:], signal[1:])
4848

@@ -100,7 +100,7 @@ def test_causal_savgol_smoother(self):
100100
# and the polynomial fit degree is set to 1. Beyond that, there will be very small
101101
# differences between the signals (due to "left_gauss_linear" not having a window_length
102102
# cutoff).
103-
window_length = 20
103+
window_length = 50
104104
signal = np.arange(window_length) + np.random.randn(window_length)
105105
smoother = Smoother(smoother_name="left_gauss_linear")
106106
smoothed_signal1 = smoother.smooth(signal)

0 commit comments

Comments
 (0)