Skip to content

Commit 4ad4c39

Browse files
committed
Update smoother to gracefully handle nans:
* entire array of nans is handled * left-padded nans are now ignored * a few other edge cases * add tests to match
1 parent a5f81ba commit 4ad4c39

File tree

2 files changed

+65
-12
lines changed

2 files changed

+65
-12
lines changed

_delphi_utils_python/delphi_utils/smooth.py

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ def __init__(
138138
raise ValueError("Invalid impute_method given.")
139139
if self.boundary_method not in valid_boundary_methods:
140140
raise ValueError("Invalid boundary_method given.")
141+
if self.window_length <= 1:
142+
raise ValueError("Window length is too short.")
141143

142144
if smoother_name == "savgol":
143145
# The polynomial fitting is done on a past window of size window_length
@@ -165,20 +167,36 @@ def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.S
165167
A smoothed 1D signal. Returns an array of the same type and length as
166168
the input.
167169
"""
170+
# If all nans, pass through
171+
if np.all(np.isnan(signal)):
172+
return signal
173+
168174
is_pandas_series = isinstance(signal, pd.Series)
169175
signal = signal.to_numpy() if is_pandas_series else signal
170176

171-
signal = self.impute(signal)
177+
# Find where the first non-nan value is located and truncate the initial nans
178+
ix = np.where(~np.isnan(signal))[0][0]
179+
signal = signal[ix:]
172180

173-
if self.smoother_name == "savgol":
174-
signal_smoothed = self.savgol_smoother(signal)
175-
elif self.smoother_name == "left_gauss_linear":
176-
signal_smoothed = self.left_gauss_linear_smoother(signal)
177-
elif self.smoother_name == "moving_average":
178-
signal_smoothed = self.moving_average_smoother(signal)
179-
else:
181+
# Don't smooth in certain edge cases
182+
if len(signal) < self.poly_fit_degree or len(signal) == 1:
180183
signal_smoothed = signal.copy()
181-
184+
else:
185+
# Impute
186+
signal = self.impute(signal)
187+
188+
# Smooth
189+
if self.smoother_name == "savgol":
190+
signal_smoothed = self.savgol_smoother(signal)
191+
elif self.smoother_name == "left_gauss_linear":
192+
signal_smoothed = self.left_gauss_linear_smoother(signal)
193+
elif self.smoother_name == "moving_average":
194+
signal_smoothed = self.moving_average_smoother(signal)
195+
elif self.smoother_name == "identity":
196+
signal_smoothed = signal
197+
198+
# Append the nans back, since we want to preserve length
199+
signal_smoothed = np.hstack([np.nan*np.ones(ix), signal_smoothed])
182200
signal_smoothed = signal_smoothed if not is_pandas_series else pd.Series(signal_smoothed)
183201
return signal_smoothed
184202

@@ -283,7 +301,7 @@ def left_gauss_linear_smoother(self, signal):
283301

284302
def savgol_predict(self, signal, poly_fit_degree, nr):
285303
"""Predict a single value using the savgol method.
286-
304+
287305
Fits a polynomial through the values given by the signal and returns the value
288306
of the polynomial at the right-most signal-value. More precisely, for a signal of length
289307
n, fits a poly_fit_degree polynomial through the points signal[-n+1+nr], signal[-n+2+nr],
@@ -312,7 +330,8 @@ def savgol_predict(self, signal, poly_fit_degree, nr):
312330
def savgol_coeffs(self, nl, nr, poly_fit_degree):
313331
"""Solve for the Savitzky-Golay coefficients.
314332
315-
The coefficients c_i give a filter so that
333+
Solves for the Savitzky-Golay coefficients. The coefficients c_i
334+
give a filter so that
316335
y = sum_{i=-{n_l}}^{n_r} c_i x_i
317336
is the value at 0 (thus the constant term) of the polynomial fit
318337
through the points {x_i}. The coefficients are c_i are calculated as
@@ -386,7 +405,7 @@ def savgol_smoother(self, signal):
386405
# - identity keeps the original signal (doesn't smooth)
387406
# - nan writes nans
388407
if self.boundary_method == "shortened_window": # pylint: disable=no-else-return
389-
for ix in range(len(self.coeffs)):
408+
for ix in range(min(len(self.coeffs), len(signal))):
390409
if ix == 0:
391410
signal_smoothed[ix] = signal[ix]
392411
else:

_delphi_utils_python/tests/test_smooth.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Tests for the smoothing utility.
33
Authors: Dmitry Shemetov, Addison Hu, Maria Jahja
44
"""
5+
from numpy.lib.polynomial import poly
56
import pytest
67

78
import numpy as np
@@ -17,6 +18,8 @@ def test_bad_inputs(self):
1718
Smoother(impute_method="hamburger")
1819
with pytest.raises(ValueError):
1920
Smoother(boundary_method="hamburger")
21+
with pytest.raises(ValueError):
22+
Smoother(window_length=1)
2023

2124
def test_identity_smoother(self):
2225
signal = np.arange(30) + np.random.rand(30)
@@ -124,6 +127,37 @@ def test_causal_savgol_smoother(self):
124127
smoothed_signal2 = smoother.smooth(signal)
125128
assert np.allclose(smoothed_signal1, smoothed_signal2)
126129

130+
# Test the all nans case
131+
signal = np.nan * np.ones(10)
132+
smoother = Smoother(window_length=9)
133+
smoothed_signal = smoother.smooth(signal)
134+
assert np.all(np.isnan(smoothed_signal))
135+
136+
# Test the case where the signal is length 1
137+
signal = np.ones(1)
138+
smoother = Smoother()
139+
smoothed_signal = smoother.smooth(signal)
140+
assert np.allclose(smoothed_signal, signal)
141+
142+
# Test the case where the signal length is less than polynomial_fit_degree
143+
signal = np.ones(2)
144+
smoother = Smoother(poly_fit_degree=3)
145+
smoothed_signal = smoother.smooth(signal)
146+
assert np.allclose(smoothed_signal, signal)
147+
148+
# Test an edge fitting case
149+
signal = np.array([np.nan, 1, np.nan])
150+
smoother = Smoother(poly_fit_degree=1, window_length=2)
151+
smoothed_signal = smoother.smooth(signal)
152+
assert np.allclose(smoothed_signal, np.array([np.nan, 1, 1]), equal_nan=True)
153+
154+
# Test a range of cases where the signal size following a sequence of nans is returned
155+
for i in range(10):
156+
signal = np.hstack([[np.nan, np.nan, np.nan], np.ones(i)])
157+
smoother = Smoother(poly_fit_degree=0, window_length=5)
158+
smoothed_signal = smoother.smooth(signal)
159+
assert np.allclose(smoothed_signal, signal, equal_nan=True)
160+
127161
def test_impute(self):
128162
# test front nan error
129163
with pytest.raises(ValueError):

0 commit comments

Comments
 (0)