Skip to content

Commit fa0b6e2

Browse files
committed
Update smoother imputer:
* separate out the smoother's polynomial fit degree from the imputer's * default the imputer's fit degree to 2 * add tests
1 parent ad01388 commit fa0b6e2

File tree

2 files changed

+45
-31
lines changed

2 files changed

+45
-31
lines changed

_delphi_utils_python/delphi_utils/smooth.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def __init__(
150150
else:
151151
self.coeffs = None
152152

153-
def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.Series]:
153+
def smooth(self, signal: Union[np.ndarray, pd.Series], impute_order=2) -> Union[np.ndarray, pd.Series]:
154154
"""Apply a smoother to a signal.
155155
156156
The major workhorse smoothing function. Imputes the nans and then applies
@@ -160,6 +160,9 @@ def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.S
160160
----------
161161
signal: np.ndarray or pd.Series
162162
A 1D signal to be smoothed.
163+
impute_order: int
164+
The polynomial order of the fit used for imputation. By default, this is set to
165+
2.
163166
164167
Returns
165168
----------
@@ -184,7 +187,7 @@ def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.S
184187
signal_smoothed = signal.copy()
185188
else:
186189
# Impute
187-
signal = self.impute(signal)
190+
signal = self.impute(signal, impute_order=impute_order)
188191

189192
# Smooth
190193
if self.smoother_name == "savgol":
@@ -204,7 +207,7 @@ def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.S
204207
signal_smoothed.index = pandas_index
205208
return signal_smoothed
206209

207-
def impute(self, signal):
210+
def impute(self, signal, impute_order=2):
208211
"""Impute the nan values in the signal.
209212
210213
See the class docstring for an explanation of the impute methods.
@@ -213,6 +216,8 @@ def impute(self, signal):
213216
----------
214217
signal: np.ndarray
215218
1D signal to be imputed.
219+
impute_order: int
220+
The polynomial order of the fit used for imputation.
216221
217222
Returns
218223
-------
@@ -224,7 +229,7 @@ def impute(self, signal):
224229
# To preserve input-output array lengths, this util will not drop NaNs for you.
225230
if np.isnan(signal[0]):
226231
raise ValueError("The signal should not begin with a nan value.")
227-
imputed_signal = self.savgol_impute(signal)
232+
imputed_signal = self.savgol_impute(signal, impute_order)
228233
elif self.impute_method == "zeros":
229234
imputed_signal = np.nan_to_num(signal)
230235
elif self.impute_method is None:
@@ -429,10 +434,10 @@ def savgol_smoother(self, signal):
429434
elif self.boundary_method == "nan":
430435
return signal_smoothed
431436

432-
def savgol_impute(self, signal):
437+
def savgol_impute(self, signal, impute_order):
433438
"""Impute the nan values in signal using savgol.
434439
435-
This method fills the nan values in the signal with a quadratic polynomial fit
440+
This method fills the nan values in the signal with polynomial interpolation
436441
on a rolling window of the immediate past up to window_length data points.
437442
438443
A number of boundary cases are handled involving nan filling close to the boundary.
@@ -444,34 +449,36 @@ def savgol_impute(self, signal):
444449
----------
445450
signal: np.ndarray
446451
A 1D signal to be imputed.
452+
impute_order: int
453+
The polynomial order of the fit used for imputation.
447454
448455
Returns
449456
----------
450457
signal_imputed: np.ndarray
451458
An imputed 1D signal.
452459
"""
460+
if impute_order > self.window_length:
461+
raise ValueError("Impute order must be smaller than window length.")
462+
453463
signal_imputed = np.copy(signal)
454464
for ix in np.where(np.isnan(signal_imputed))[0]:
455465
# Boundary cases
456466
if ix < self.window_length:
457467
# At the boundary, a single value should just be extended
458468
if ix == 1:
459469
signal_imputed[ix] = signal_imputed[ix - 1]
460-
# Reduce the polynomial degree if needed
461-
elif ix == 2:
462-
signal_imputed[ix] = self.savgol_predict(
463-
signal_imputed[:ix], 1, -1
464-
)
465-
# Otherwise, use savgol fitting on the largest window prior
470+
# Otherwise, use savgol fitting on the largest window prior,
471+
# reduce the polynomial degree if needed (can't fit if the
472+
# imputation order is larger than the available data)
466473
else:
467474
signal_imputed[ix] = self.savgol_predict(
468-
signal_imputed[:ix], self.poly_fit_degree, -1
475+
signal_imputed[:ix], min(ix-1, impute_order), -1
469476
)
470477
# Away from the boundary, use savgol fitting on a fixed window
471478
else:
472479
signal_imputed[ix] = self.savgol_predict(
473480
signal_imputed[ix - self.window_length : ix],
474-
self.poly_fit_degree,
481+
impute_order,
475482
-1,
476483
)
477484
return signal_imputed

_delphi_utils_python/tests/test_smooth.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,12 @@ def test_causal_savgol_smoother(self):
158158
smoothed_signal = smoother.smooth(signal)
159159
assert np.allclose(smoothed_signal, signal, equal_nan=True)
160160

161+
# test window_length > len(signal) and boundary_method="identity"
162+
signal = np.arange(20)
163+
smoother = Smoother(boundary_method="identity", window_length=30)
164+
smoothed_signal = smoother.smooth(signal)
165+
assert np.allclose(signal, smoothed_signal)
166+
161167
def test_impute(self):
162168
# test front nan error
163169
with pytest.raises(ValueError):
@@ -178,7 +184,7 @@ def test_impute(self):
178184
signal = np.array([i if i % 3 else np.nan for i in range(1, 40)])
179185
# test that the non-nan values are unchanged
180186
not_nans_ixs = np.bitwise_xor(np.isnan(signal, where=True), np.full(len(signal), True))
181-
smoothed_signal = Smoother().savgol_impute(signal)
187+
smoothed_signal = Smoother().impute(signal)
182188
assert np.allclose(signal[not_nans_ixs], smoothed_signal[not_nans_ixs])
183189
# test that the imputer is close to the true line
184190
assert np.allclose(range(1, 40), smoothed_signal, atol=0.5)
@@ -187,47 +193,41 @@ def test_impute(self):
187193
signal = np.hstack([np.arange(10), [np.nan], np.arange(10)])
188194
window_length = 10
189195
smoother = Smoother(
190-
smoother_name="savgol", window_length=window_length, poly_fit_degree=1
196+
window_length=window_length, poly_fit_degree=1
191197
)
192-
imputed_signal = smoother.savgol_impute(signal)
198+
imputed_signal = smoother.impute(signal)
193199
assert np.allclose(imputed_signal, np.hstack([np.arange(11), np.arange(10)]))
194200
smoother = Smoother(
195-
smoother_name="savgol", window_length=window_length, poly_fit_degree=2
201+
window_length=window_length, poly_fit_degree=2
196202
)
197-
imputed_signal = smoother.savgol_impute(signal)
203+
imputed_signal = smoother.impute(signal)
198204
assert np.allclose(imputed_signal, np.hstack([np.arange(11), np.arange(10)]))
199205

200206
# if there are nans on the boundary, should dynamically change window
201207
signal = np.hstack(
202208
[np.arange(5), [np.nan], np.arange(20), [np.nan], np.arange(5)]
203209
)
204210
smoother = Smoother(
205-
smoother_name="savgol", window_length=window_length, poly_fit_degree=2
211+
window_length=window_length, poly_fit_degree=2
206212
)
207-
imputed_signal = smoother.savgol_impute(signal)
213+
imputed_signal = smoother.impute(signal)
208214
assert np.allclose(
209215
imputed_signal, np.hstack([np.arange(6), np.arange(21), np.arange(5)]),
210216
)
211217

212218
# if the array begins with np.nan, we should tell the user to peel it off before sending
213219
signal = np.hstack([[np.nan], np.arange(20), [np.nan], np.arange(5)])
214220
smoother = Smoother(
215-
smoother_name="savgol", window_length=window_length, poly_fit_degree=2
221+
window_length=window_length, poly_fit_degree=2
216222
)
217223
with pytest.raises(ValueError):
218-
imputed_signal = smoother.savgol_impute(signal)
219-
220-
# test window_length > len(signal) and boundary_method="identity"
221-
signal = np.arange(20)
222-
smoother = Smoother(smoother_name="savgol", boundary_method="identity", window_length=30)
223-
smoothed_signal = smoother.smooth(signal)
224-
assert np.allclose(signal, smoothed_signal)
224+
imputed_signal = smoother.impute(signal)
225225

226226
# test the boundary methods
227227
signal = np.arange(20)
228-
smoother = Smoother(smoother_name="savgol", poly_fit_degree=0,
228+
smoother = Smoother(poly_fit_degree=0,
229229
boundary_method="identity", window_length=10)
230-
smoothed_signal = smoother.savgol_impute(signal)
230+
smoothed_signal = smoother.impute(signal)
231231
assert np.allclose(smoothed_signal, signal)
232232

233233
# test that we don't hit a matrix inversion error when there are
@@ -238,6 +238,13 @@ def test_impute(self):
238238
smoothed_signal = smoother.impute(signal)
239239
assert np.allclose(smoothed_signal, np.hstack([[1], np.ones(12), np.arange(5)]))
240240

241+
# test the impute_order argument
242+
signal = np.hstack([[1, np.nan, np.nan, 2], np.arange(5)])
243+
smoother = Smoother()
244+
smoothed_signal = smoother.impute(signal, impute_order=1)
245+
assert np.allclose(smoothed_signal, np.hstack([[1, 1, 1, 2], np.arange(5)]))
246+
247+
241248
def test_pandas_series_input(self):
242249
# The savgol method should match the linear regression method on the first
243250
# window_length-many values of the signal, if the savgol_weighting is set to true,

0 commit comments

Comments
 (0)