Skip to content

Commit 95ece40

Browse files
authored
Merge pull request #490 from cmu-delphi/fix_smoother_imputing_polyorder
Fix smoother imputing polynomial order
2 parents 342df53 + fa0b6e2 commit 95ece40

File tree

2 files changed

+58
-32
lines changed

2 files changed

+58
-32
lines changed

_delphi_utils_python/delphi_utils/smooth.py

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def __init__(
150150
else:
151151
self.coeffs = None
152152

153-
def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.Series]:
153+
def smooth(self, signal: Union[np.ndarray, pd.Series], impute_order=2) -> Union[np.ndarray, pd.Series]:
154154
"""Apply a smoother to a signal.
155155
156156
The major workhorse smoothing function. Imputes the nans and then applies
@@ -160,6 +160,9 @@ def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.S
160160
----------
161161
signal: np.ndarray or pd.Series
162162
A 1D signal to be smoothed.
163+
impute_order: int
164+
The polynomial order of the fit used for imputation. By default, this is set to
165+
2.
163166
164167
Returns
165168
----------
@@ -172,6 +175,7 @@ def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.S
172175
return signal
173176

174177
is_pandas_series = isinstance(signal, pd.Series)
178+
pandas_index = signal.index if is_pandas_series else None
175179
signal = signal.to_numpy() if is_pandas_series else signal
176180

177181
# Find where the first non-nan value is located and truncate the initial nans
@@ -183,7 +187,7 @@ def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.S
183187
signal_smoothed = signal.copy()
184188
else:
185189
# Impute
186-
signal = self.impute(signal)
190+
signal = self.impute(signal, impute_order=impute_order)
187191

188192
# Smooth
189193
if self.smoother_name == "savgol":
@@ -197,10 +201,13 @@ def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.S
197201

198202
# Append the nans back, since we want to preserve length
199203
signal_smoothed = np.hstack([np.nan*np.ones(ix), signal_smoothed])
200-
signal_smoothed = signal_smoothed if not is_pandas_series else pd.Series(signal_smoothed)
204+
# Convert back to pandas if necessary
205+
if is_pandas_series:
206+
signal_smoothed = pd.Series(signal_smoothed)
207+
signal_smoothed.index = pandas_index
201208
return signal_smoothed
202209

203-
def impute(self, signal):
210+
def impute(self, signal, impute_order=2):
204211
"""Impute the nan values in the signal.
205212
206213
See the class docstring for an explanation of the impute methods.
@@ -209,6 +216,8 @@ def impute(self, signal):
209216
----------
210217
signal: np.ndarray
211218
1D signal to be imputed.
219+
impute_order: int
220+
The polynomial order of the fit used for imputation.
212221
213222
Returns
214223
-------
@@ -220,7 +229,7 @@ def impute(self, signal):
220229
# To preserve input-output array lengths, this util will not drop NaNs for you.
221230
if np.isnan(signal[0]):
222231
raise ValueError("The signal should not begin with a nan value.")
223-
imputed_signal = self.savgol_impute(signal)
232+
imputed_signal = self.savgol_impute(signal, impute_order)
224233
elif self.impute_method == "zeros":
225234
imputed_signal = np.nan_to_num(signal)
226235
elif self.impute_method is None:
@@ -424,10 +433,10 @@ def savgol_smoother(self, signal):
424433
elif self.boundary_method == "nan":
425434
return signal_smoothed
426435

427-
def savgol_impute(self, signal):
436+
def savgol_impute(self, signal, impute_order):
428437
"""Impute the nan values in signal using savgol.
429438
430-
This method fills the nan values in the signal with a quadratic polynomial fit
439+
This method fills the nan values in the signal with polynomial interpolation
431440
on a rolling window of the immediate past up to window_length data points.
432441
433442
A number of boundary cases are handled involving nan filling close to the boundary.
@@ -439,34 +448,36 @@ def savgol_impute(self, signal):
439448
----------
440449
signal: np.ndarray
441450
A 1D signal to be imputed.
451+
impute_order: int
452+
The polynomial order of the fit used for imputation.
442453
443454
Returns
444455
----------
445456
signal_imputed: np.ndarray
446457
An imputed 1D signal.
447458
"""
459+
if impute_order > self.window_length:
460+
raise ValueError("Impute order must be smaller than window length.")
461+
448462
signal_imputed = np.copy(signal)
449463
for ix in np.where(np.isnan(signal_imputed))[0]:
450464
# Boundary cases
451465
if ix < self.window_length:
452466
# At the boundary, a single value should just be extended
453467
if ix == 1:
454468
signal_imputed[ix] = signal_imputed[ix - 1]
455-
# Reduce the polynomial degree if needed
456-
elif ix == 2:
457-
signal_imputed[ix] = self.savgol_predict(
458-
signal_imputed[:ix], 1, -1
459-
)
460-
# Otherwise, use savgol fitting on the largest window prior
469+
# Otherwise, use savgol fitting on the largest window prior,
470+
# reduce the polynomial degree if needed (can't fit if the
471+
# imputation order is larger than the available data)
461472
else:
462473
signal_imputed[ix] = self.savgol_predict(
463-
signal_imputed[:ix], self.poly_fit_degree, -1
474+
signal_imputed[:ix], min(ix-1, impute_order), -1
464475
)
465476
# Away from the boundary, use savgol fitting on a fixed window
466477
else:
467478
signal_imputed[ix] = self.savgol_predict(
468479
signal_imputed[ix - self.window_length : ix],
469-
self.poly_fit_degree,
480+
impute_order,
470481
-1,
471482
)
472483
return signal_imputed

_delphi_utils_python/tests/test_smooth.py

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,12 @@ def test_causal_savgol_smoother(self):
158158
smoothed_signal = smoother.smooth(signal)
159159
assert np.allclose(smoothed_signal, signal, equal_nan=True)
160160

161+
# test window_length > len(signal) and boundary_method="identity"
162+
signal = np.arange(20)
163+
smoother = Smoother(boundary_method="identity", window_length=30)
164+
smoothed_signal = smoother.smooth(signal)
165+
assert np.allclose(signal, smoothed_signal)
166+
161167
def test_impute(self):
162168
# test front nan error
163169
with pytest.raises(ValueError):
@@ -178,7 +184,7 @@ def test_impute(self):
178184
signal = np.array([i if i % 3 else np.nan for i in range(1, 40)])
179185
# test that the non-nan values are unchanged
180186
not_nans_ixs = np.bitwise_xor(np.isnan(signal, where=True), np.full(len(signal), True))
181-
smoothed_signal = Smoother().savgol_impute(signal)
187+
smoothed_signal = Smoother().impute(signal)
182188
assert np.allclose(signal[not_nans_ixs], smoothed_signal[not_nans_ixs])
183189
# test that the imputer is close to the true line
184190
assert np.allclose(range(1, 40), smoothed_signal, atol=0.5)
@@ -187,47 +193,41 @@ def test_impute(self):
187193
signal = np.hstack([np.arange(10), [np.nan], np.arange(10)])
188194
window_length = 10
189195
smoother = Smoother(
190-
smoother_name="savgol", window_length=window_length, poly_fit_degree=1
196+
window_length=window_length, poly_fit_degree=1
191197
)
192-
imputed_signal = smoother.savgol_impute(signal)
198+
imputed_signal = smoother.impute(signal)
193199
assert np.allclose(imputed_signal, np.hstack([np.arange(11), np.arange(10)]))
194200
smoother = Smoother(
195-
smoother_name="savgol", window_length=window_length, poly_fit_degree=2
201+
window_length=window_length, poly_fit_degree=2
196202
)
197-
imputed_signal = smoother.savgol_impute(signal)
203+
imputed_signal = smoother.impute(signal)
198204
assert np.allclose(imputed_signal, np.hstack([np.arange(11), np.arange(10)]))
199205

200206
# if there are nans on the boundary, should dynamically change window
201207
signal = np.hstack(
202208
[np.arange(5), [np.nan], np.arange(20), [np.nan], np.arange(5)]
203209
)
204210
smoother = Smoother(
205-
smoother_name="savgol", window_length=window_length, poly_fit_degree=2
211+
window_length=window_length, poly_fit_degree=2
206212
)
207-
imputed_signal = smoother.savgol_impute(signal)
213+
imputed_signal = smoother.impute(signal)
208214
assert np.allclose(
209215
imputed_signal, np.hstack([np.arange(6), np.arange(21), np.arange(5)]),
210216
)
211217

212218
# if the array begins with np.nan, we should tell the user to peel it off before sending
213219
signal = np.hstack([[np.nan], np.arange(20), [np.nan], np.arange(5)])
214220
smoother = Smoother(
215-
smoother_name="savgol", window_length=window_length, poly_fit_degree=2
221+
window_length=window_length, poly_fit_degree=2
216222
)
217223
with pytest.raises(ValueError):
218-
imputed_signal = smoother.savgol_impute(signal)
219-
220-
# test window_length > len(signal) and boundary_method="identity"
221-
signal = np.arange(20)
222-
smoother = Smoother(smoother_name="savgol", boundary_method="identity", window_length=30)
223-
smoothed_signal = smoother.smooth(signal)
224-
assert np.allclose(signal, smoothed_signal)
224+
imputed_signal = smoother.impute(signal)
225225

226226
# test the boundary methods
227227
signal = np.arange(20)
228-
smoother = Smoother(smoother_name="savgol", poly_fit_degree=0,
228+
smoother = Smoother(poly_fit_degree=0,
229229
boundary_method="identity", window_length=10)
230-
smoothed_signal = smoother.savgol_impute(signal)
230+
smoothed_signal = smoother.impute(signal)
231231
assert np.allclose(smoothed_signal, signal)
232232

233233
# test that we don't hit a matrix inversion error when there are
@@ -238,6 +238,13 @@ def test_impute(self):
238238
smoothed_signal = smoother.impute(signal)
239239
assert np.allclose(smoothed_signal, np.hstack([[1], np.ones(12), np.arange(5)]))
240240

241+
# test the impute_order argument
242+
signal = np.hstack([[1, np.nan, np.nan, 2], np.arange(5)])
243+
smoother = Smoother()
244+
smoothed_signal = smoother.impute(signal, impute_order=1)
245+
assert np.allclose(smoothed_signal, np.hstack([[1, 1, 1, 2], np.arange(5)]))
246+
247+
241248
def test_pandas_series_input(self):
242249
# The savgol method should match the linear regression method on the first
243250
# window_length-many values of the signal, if the savgol_weighting is set to true,
@@ -281,3 +288,11 @@ def test_pandas_series_input(self):
281288
assert np.allclose(
282289
signal[window_length - 1 :], smoothed_signal[window_length - 1 :]
283290
)
291+
292+
# Test that the index of the series gets preserved
293+
signal = pd.Series(np.ones(30), index=np.arange(50, 80))
294+
smoother = Smoother(smoother_name="moving_average", window_length=10)
295+
smoothed_signal = signal.transform(smoother.smooth)
296+
ix1 = signal.index
297+
ix2 = smoothed_signal.index
298+
assert ix1.equals(ix2)

0 commit comments

Comments
 (0)