Skip to content

Commit ad77995

Browse files
committed
Updated the smoother to remove a non-invertible design matrix bug:
* the error arose in the case of fitting a polynomial to fewer datapoints than polynomial degree
1 parent a471262 commit ad77995

File tree

1 file changed

+41
-34
lines changed

1 file changed

+41
-34
lines changed

_delphi_utils_python/delphi_utils/smooth.py

Lines changed: 41 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,9 @@ def __init__(
142142
if smoother_name == "savgol":
143143
# The polynomial fitting is done on a past window of size window_length
144144
# including the current day value.
145-
self.coeffs = self.savgol_coeffs(-self.window_length + 1, 0)
145+
self.coeffs = self.savgol_coeffs(
146+
-self.window_length + 1, 0, self.poly_fit_degree
147+
)
146148
else:
147149
self.coeffs = None
148150

@@ -278,30 +280,35 @@ def left_gauss_linear_smoother(self, signal):
278280
signal_smoothed[signal_smoothed <= self.minval] = self.minval
279281
return signal_smoothed
280282

281-
def savgol_predict(self, signal):
283+
def savgol_predict(self, signal, poly_fit_degree, nr):
282284
"""Predict a single value using the savgol method.
283285
284286
Fits a polynomial through the values given by the signal and returns the value
285-
of the polynomial at the right-most signal-value. More precisely, fits a polynomial
286-
f(t) of degree poly_fit_degree through the points signal[-n], signal[-n+1] ..., signal[-1],
287-
and returns the evaluation of the polynomial at the location of signal[0].
287+
of the polynomial at the right-most signal-value. More precisely, for a signal of length
288+
n, fits a poly_fit_degree polynomial through the points signal[-n+1+nr], signal[-n+2+nr],
289+
..., signal[nr], and returns the evaluation of the polynomial at signal[0]. Hence, if
290+
nr=0, then the last value of the signal is smoothed, and if nr=-1, then the value after
291+
the last signal value is anticipated.
288292
289293
Parameters
290294
----------
291295
signal: np.ndarray
292296
A 1D signal to smooth.
297+
poly_fit_degree: int
298+
The degree of the polynomial fit.
299+
nr: int
300+
An integer that determines the position of the predicted value relative to the signal.
293301
294302
Returns
295303
----------
296304
predicted_value: float
297305
The anticipated value that comes after the end of the signal based on a polynomial fit.
298306
"""
299-
# Add one
300-
coeffs = self.savgol_coeffs(-len(signal) + 1, 0)
307+
coeffs = self.savgol_coeffs(-len(signal) + 1 + nr, nr, poly_fit_degree)
301308
predicted_value = signal @ coeffs
302309
return predicted_value
303310

304-
def savgol_coeffs(self, nl, nr):
311+
def savgol_coeffs(self, nl, nr, poly_fit_degree):
305312
"""Solve for the Savitzky-Golay coefficients.
306313
307314
The coefficients c_i give a filter so that
@@ -321,23 +328,20 @@ def savgol_coeffs(self, nl, nr):
321328
The right window bound for the polynomial fit, inclusive.
322329
poly_fit_degree: int
323330
The degree of the polynomial to be fit.
324-
gaussian_bandwidth: float or None
325-
If float, performs regression with Gaussian weights whose variance is
326-
the gaussian_bandwidth. If None, performs unweighted regression.
327331
328332
Returns
329333
----------
330334
coeffs: np.ndarray
331-
A vector of coefficients of length nl that determines the savgol
335+
A vector of coefficients of length nr - nl + 1 that determines the savgol
332336
convolution filter.
333337
"""
334338
if nl >= nr:
335339
raise ValueError("The left window bound should be less than the right.")
336340
if nr > 0:
337-
raise warnings.warn("The filter is no longer causal.")
341+
warnings.warn("The filter is no longer causal.")
338342

339343
A = np.vstack(
340-
[np.arange(nl, nr + 1) ** j for j in range(self.poly_fit_degree + 1)]
344+
[np.arange(nl, nr + 1) ** j for j in range(poly_fit_degree + 1)]
341345
).T
342346

343347
if self.gaussian_bandwidth is None:
@@ -388,8 +392,10 @@ def savgol_smoother(self, signal):
388392
# At the very edge, the design matrix is often singular, in which case
389393
# we just fall back to the raw signal
390394
try:
391-
signal_smoothed[ix] = self.savgol_predict(signal[:ix+1])
392-
except np.linalg.LinAlgError:
395+
signal_smoothed[ix] = self.savgol_predict(
396+
signal[: ix + 1], self.poly_fit_degree, 0
397+
)
398+
except np.linalg.LinAlgError: # for small ix, the design matrix is singular
393399
signal_smoothed[ix] = signal[ix]
394400
return signal_smoothed
395401
elif self.boundary_method == "identity":
@@ -402,19 +408,13 @@ def savgol_smoother(self, signal):
402408
def savgol_impute(self, signal):
403409
"""Impute the nan values in signal using savgol.
404410
405-
This method fills the nan values in the signal with an M-degree polynomial fit
411+
This method fills the nan values in the signal with a quadratic polynomial fit
406412
on a rolling window of the immediate past up to window_length data points.
407413
408-
In the case of a single data point in the past, the single data point is
409-
continued. In the case of no data points in the past (i.e. the signal starts
410-
with nan), an error is raised.
414+
A number of boundary cases are handled involving nan filling close to the boundary.
411415
412416
Note that in the case of many adjacent nans, the method will use previously
413-
imputed values to do the fitting for later values. E.g. for
414-
>>> x = np.array([1.0, 2.0, np.nan, 1.0, np.nan])
415-
the last np.nan will be fit on np.array([1.0, 2.0, *, 1.0]), where * is the
416-
result of imputing based on np.array([1.0, 2.0]) (depends on the savgol
417-
settings).
417+
imputed values to do the fitting for later values.
418418
419419
Parameters
420420
----------
@@ -430,17 +430,24 @@ def savgol_impute(self, signal):
430430
for ix in np.where(np.isnan(signal))[0]:
431431
# Boundary cases
432432
if ix < self.window_length:
433-
# A nan following a single value should just be extended
433+
# At the boundary, a single value should just be extended
434434
if ix == 1:
435-
signal_imputed[ix] = signal_imputed[0]
436-
# Otherwise, use savgol fitting
435+
signal_imputed[ix] = signal_imputed[ix - 1]
436+
# Reduce the polynomial degree if needed
437+
elif ix == 2:
438+
signal_imputed[ix] = self.savgol_predict(
439+
signal_imputed[:ix], 1, -1
440+
)
441+
# Otherwise, use savgol fitting on the largest window prior
437442
else:
438-
coeffs = self.savgol_coeffs(-ix, -1)
439-
signal_imputed[ix] = signal_imputed[:ix] @ coeffs
440-
# Use a polynomial fit on the past window length to impute
443+
signal_imputed[ix] = self.savgol_predict(
444+
signal_imputed[:ix], self.poly_fit_degree, -1
445+
)
446+
# Away from the boundary, use savgol fitting on a fixed window
441447
else:
442-
coeffs = self.savgol_coeffs(-self.window_length, -1)
443-
signal_imputed[ix] = (
444-
signal_imputed[ix - self.window_length : ix] @ coeffs
448+
signal_imputed[ix] = self.savgol_predict(
449+
signal_imputed[ix - self.window_length : ix],
450+
self.poly_fit_degree,
451+
-1,
445452
)
446453
return signal_imputed

0 commit comments

Comments
 (0)