Skip to content

Commit a5f81ba

Browse files
committed
Updated the smoother to remove a non-invertible design matrix bug
1 parent 6681a47 commit a5f81ba

File tree

2 files changed

+68
-37
lines changed

2 files changed

+68
-37
lines changed

_delphi_utils_python/delphi_utils/smooth.py

Lines changed: 43 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,9 @@ def __init__(
142142
if smoother_name == "savgol":
143143
# The polynomial fitting is done on a past window of size window_length
144144
# including the current day value.
145-
self.coeffs = self.savgol_coeffs(-self.window_length + 1, 0)
145+
self.coeffs = self.savgol_coeffs(
146+
-self.window_length + 1, 0, self.poly_fit_degree
147+
)
146148
else:
147149
self.coeffs = None
148150

@@ -279,30 +281,35 @@ def left_gauss_linear_smoother(self, signal):
279281
signal_smoothed[signal_smoothed <= self.minval] = self.minval
280282
return signal_smoothed
281283

282-
def savgol_predict(self, signal):
284+
def savgol_predict(self, signal, poly_fit_degree, nr):
283285
"""Predict a single value using the savgol method.
284286
285287
Fits a polynomial through the values given by the signal and returns the value
286-
of the polynomial at the right-most signal-value. More precisely, fits a polynomial
287-
f(t) of degree poly_fit_degree through the points signal[-n], signal[-n+1] ..., signal[-1],
288-
and returns the evaluation of the polynomial at the location of signal[0].
288+
of the polynomial at the right-most signal-value. More precisely, for a signal of length
289+
n, fits a poly_fit_degree polynomial through the points signal[-n+1+nr], signal[-n+2+nr],
290+
..., signal[nr], and returns the evaluation of the polynomial at signal[0]. Hence, if
291+
nr=0, then the last value of the signal is smoothed, and if nr=-1, then the value after
292+
the last signal value is anticipated.
289293
290294
Parameters
291295
----------
292296
signal: np.ndarray
293297
A 1D signal to smooth.
298+
poly_fit_degree: int
299+
The degree of the polynomial fit.
300+
nr: int
301+
An integer that determines the position of the predicted value relative to the signal.
294302
295303
Returns
296304
----------
297305
predicted_value: float
298306
The anticipated value that comes after the end of the signal based on a polynomial fit.
299307
"""
300-
# Add one
301-
coeffs = self.savgol_coeffs(-len(signal) + 1, 0)
308+
coeffs = self.savgol_coeffs(-len(signal) + 1 + nr, nr, poly_fit_degree)
302309
predicted_value = signal @ coeffs
303310
return predicted_value
304311

305-
def savgol_coeffs(self, nl, nr):
312+
def savgol_coeffs(self, nl, nr, poly_fit_degree):
306313
"""Solve for the Savitzky-Golay coefficients.
307314
308315
The coefficients c_i give a filter so that
@@ -322,23 +329,20 @@ def savgol_coeffs(self, nl, nr):
322329
The right window bound for the polynomial fit, inclusive.
323330
poly_fit_degree: int
324331
The degree of the polynomial to be fit.
325-
gaussian_bandwidth: float or None
326-
If float, performs regression with Gaussian weights whose variance is
327-
the gaussian_bandwidth. If None, performs unweighted regression.
328332
329333
Returns
330334
----------
331335
coeffs: np.ndarray
332-
A vector of coefficients of length nl that determines the savgol
336+
A vector of coefficients of length nr - nl + 1 that determines the savgol
333337
convolution filter.
334338
"""
335339
if nl >= nr:
336340
raise ValueError("The left window bound should be less than the right.")
337341
if nr > 0:
338-
raise warnings.warn("The filter is no longer causal.")
342+
warnings.warn("The filter is no longer causal.")
339343

340-
A = np.vstack( # pylint: disable=invalid-name
341-
[np.arange(nl, nr + 1) ** j for j in range(self.poly_fit_degree + 1)]
344+
A = np.vstack(
345+
[np.arange(nl, nr + 1) ** j for j in range(poly_fit_degree + 1)]
342346
).T
343347

344348
if self.gaussian_bandwidth is None:
@@ -389,8 +393,10 @@ def savgol_smoother(self, signal):
389393
# At the very edge, the design matrix is often singular, in which case
390394
# we just fall back to the raw signal
391395
try:
392-
signal_smoothed[ix] = self.savgol_predict(signal[:ix+1])
393-
except np.linalg.LinAlgError:
396+
signal_smoothed[ix] = self.savgol_predict(
397+
signal[: ix + 1], self.poly_fit_degree, 0
398+
)
399+
except np.linalg.LinAlgError: # for small ix, the design matrix is singular
394400
signal_smoothed[ix] = signal[ix]
395401
return signal_smoothed
396402
elif self.boundary_method == "identity":
@@ -403,19 +409,13 @@ def savgol_smoother(self, signal):
403409
def savgol_impute(self, signal):
404410
"""Impute the nan values in signal using savgol.
405411
406-
This method fills the nan values in the signal with an M-degree polynomial fit
412+
This method fills the nan values in the signal with a quadratic polynomial fit
407413
on a rolling window of the immediate past up to window_length data points.
408414
409-
In the case of a single data point in the past, the single data point is
410-
continued. In the case of no data points in the past (i.e. the signal starts
411-
with nan), an error is raised.
415+
A number of boundary cases are handled involving nan filling close to the boundary.
412416
413417
Note that in the case of many adjacent nans, the method will use previously
414-
imputed values to do the fitting for later values. E.g. for
415-
>>> x = np.array([1.0, 2.0, np.nan, 1.0, np.nan])
416-
the last np.nan will be fit on np.array([1.0, 2.0, *, 1.0]), where * is the
417-
result of imputing based on np.array([1.0, 2.0]) (depends on the savgol
418-
settings).
418+
imputed values to do the fitting for later values.
419419
420420
Parameters
421421
----------
@@ -428,20 +428,27 @@ def savgol_impute(self, signal):
428428
An imputed 1D signal.
429429
"""
430430
signal_imputed = np.copy(signal)
431-
for ix in np.where(np.isnan(signal))[0]:
431+
for ix in np.where(np.isnan(signal_imputed))[0]:
432432
# Boundary cases
433433
if ix < self.window_length:
434-
# A nan following a single value should just be extended
434+
# At the boundary, a single value should just be extended
435435
if ix == 1:
436-
signal_imputed[ix] = signal_imputed[0]
437-
# Otherwise, use savgol fitting
436+
signal_imputed[ix] = signal_imputed[ix - 1]
437+
# Reduce the polynomial degree if needed
438+
elif ix == 2:
439+
signal_imputed[ix] = self.savgol_predict(
440+
signal_imputed[:ix], 1, -1
441+
)
442+
# Otherwise, use savgol fitting on the largest window prior
438443
else:
439-
coeffs = self.savgol_coeffs(-ix, -1)
440-
signal_imputed[ix] = signal_imputed[:ix] @ coeffs
441-
# Use a polynomial fit on the past window length to impute
444+
signal_imputed[ix] = self.savgol_predict(
445+
signal_imputed[:ix], self.poly_fit_degree, -1
446+
)
447+
# Away from the boundary, use savgol fitting on a fixed window
442448
else:
443-
coeffs = self.savgol_coeffs(-self.window_length, -1)
444-
signal_imputed[ix] = (
445-
signal_imputed[ix - self.window_length : ix] @ coeffs
449+
signal_imputed[ix] = self.savgol_predict(
450+
signal_imputed[ix - self.window_length : ix],
451+
self.poly_fit_degree,
452+
-1,
446453
)
447454
return signal_imputed

_delphi_utils_python/tests/test_smooth.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,24 @@
1010

1111

1212
class TestSmoothers:
13+
def test_bad_inputs(self):
14+
with pytest.raises(ValueError):
15+
Smoother(smoother_name="hamburger")
16+
with pytest.raises(ValueError):
17+
Smoother(impute_method="hamburger")
18+
with pytest.raises(ValueError):
19+
Smoother(boundary_method="hamburger")
20+
1321
def test_identity_smoother(self):
1422
signal = np.arange(30) + np.random.rand(30)
1523
assert np.allclose(signal, Smoother(smoother_name="identity").smooth(signal))
1624

1725
def test_moving_average_smoother(self):
26+
# Test non-integer window-length
27+
with pytest.raises(ValueError):
28+
signal = np.array([1, 1, 1])
29+
Smoother(smoother_name="window_average", window_length=5.5).smooth(signal)
30+
1831
# The raw and smoothed lengths should match
1932
signal = np.ones(30)
2033
smoother = Smoother(smoother_name="moving_average")
@@ -109,10 +122,13 @@ def test_causal_savgol_smoother(self):
109122
smoother_name="savgol", window_length=window_length, poly_fit_degree=1,
110123
)
111124
smoothed_signal2 = smoother.smooth(signal)
112-
113125
assert np.allclose(smoothed_signal1, smoothed_signal2)
114126

115127
def test_impute(self):
128+
# test front nan error
129+
with pytest.raises(ValueError):
130+
Smoother().impute(signal=np.array([np.nan, 1, 1]))
131+
116132
# test the nan imputer
117133
signal = np.array([i if i % 3 else np.nan for i in range(1, 40)])
118134
assert np.allclose(Smoother(impute_method=None).impute(signal), signal, equal_nan=True)
@@ -180,6 +196,14 @@ def test_impute(self):
180196
smoothed_signal = smoother.savgol_impute(signal)
181197
assert np.allclose(smoothed_signal, signal)
182198

199+
# test that we don't hit a matrix inversion error when there are
200+
# nans on less than window_length away from the boundary
201+
signal = np.hstack([[1], np.nan*np.ones(12), np.arange(5)])
202+
smoother = Smoother(smoother_name="savgol", poly_fit_degree=2,
203+
boundary_method="identity", window_length=10)
204+
smoothed_signal = smoother.impute(signal)
205+
assert np.allclose(smoothed_signal, np.hstack([[1], np.ones(12), np.arange(5)]))
206+
183207
def test_pandas_series_input(self):
184208
# The savgol method should match the linear regression method on the first
185209
# window_length-many values of the signal, if the savgol_weighting is set to true,

0 commit comments

Comments
 (0)