Skip to content

Commit aab94b9

Browse files
committed
Final smoothing utility changes
* remove smooth_by_geoid (was only used in google_health) * improve documentation with more usage examples * add a few more tests
1 parent b061285 commit aab94b9

File tree

3 files changed

+61
-58
lines changed

3 files changed

+61
-58
lines changed

_delphi_utils_python/delphi_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from .archive import ArchiveDiffer, GitArchiveDiffer, S3ArchiveDiffer
88
from .export import create_export_csv
99
from .utils import read_params
10-
from .smooth import Smoother, smoothed_values_by_geo_id
10+
from .smooth import Smoother
1111
from .geomap import GeoMapper
1212

1313
__version__ = "0.1.0"

_delphi_utils_python/delphi_utils/smooth.py

Lines changed: 46 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,16 @@
1313
import warnings
1414

1515
import numpy as np
16-
import pandas as pd
1716

1817

1918
class Smoother:
2019
"""
21-
This is the smoothing utility class. It handles imputation and smoothing.
22-
Reasonable defaults are given for all the parameters, but fine-grained
23-
control is exposed.
24-
25-
Instantiating a smoother class specifies a smoother with a host of parameters,
26-
which can then be applied to an np.ndarray with the function smooth:
27-
> smoother = Smoother(smoother_name='savgol', window_length=28, gaussian_bandwidth=144)
28-
> smoothed_signal = smoother.smooth(signal)
20+
This is the smoothing utility class. This class holds the parameter settings for its smoother
21+
methods and provides reasonable defaults. Basic usage can be found in the examples below.
22+
23+
The smoother function takes numpy arrays as input, expecting the values to come from a
24+
regularly-spaced time grid. NANs are ok, as long as the array does not begin with a NAN. The
25+
rest of the NANs will be handled via imputation by default, though this can be turned off.
2926
3027
Parameters
3128
----------
@@ -53,8 +50,8 @@ class Smoother:
5350
35 905
5451
42 1303
5552
impute: {'savgol', 'zeros', None}
56-
If 'savgol' (default), will fill nan values with a savgol fit on the largest available time
57-
window prior (up to window_length). If 'zeros', will fill nan values with zeros.
53+
If 'savgol' (default), will fill nan values with a savgol fit on the largest available time
54+
window prior (up to window_length). If 'zeros', will fill nan values with zeros.
5855
If None, leaves the nans in place.
5956
minval: float or None
6057
The smallest value to allow in a signal. If None, there is no smallest value.
@@ -66,6 +63,33 @@ class Smoother:
6663
----------
6764
smooth: np.ndarray
6865
Takes a 1D signal and returns a smoothed version. Both arrays have the same length.
66+
67+
Example Usage
68+
-------------
69+
Example 1. Apply a rolling average smoother with a window of length 10.
70+
>>> smoother = Smoother(smoother_name='moving_average', window_length=10)
71+
>>> smoothed_signal = smoother.smooth(signal)
72+
73+
Example 2. Smooth a dataframe column.
74+
>>> smoother = Smoother(smoother_name="savgol")
75+
>>> df[col] = pd.Series(smoother.smooth(df[col].to_numpy()))
76+
77+
Example 3. Apply a rolling weighted average smoother, with 95% weight on the recent 2 weeks and
78+
a sharp cutoff after 4 weeks.
79+
>>> smoother = Smoother(smoother_name='savgol', poly_fit_degree=0, window_length=28,
80+
gaussian_bandwidth=144)
81+
>>> smoothed_signal = smoother.smooth(signal)
82+
83+
Example 4. Apply a local linear regression smoother (equivalent to `left_gauss_linear`), with
84+
95% weight on the recent week and a sharp cutoff after 3 weeks.
85+
>>> smoother = Smoother(smoother_name='savgol', poly_fit_degree=1, window_length=21,
86+
gaussian_bandwidth=36)
87+
>>> smoothed_signal = smoother.smooth(signal)
88+
89+
Example 5. Apply the identity function (simplifies code that iterates through smoothers _and_
90+
raw data).
91+
>>> smoother = Smoother(smoother_name='identity')
92+
>>> smoothed_signal = smoother.smooth(signal)
6993
"""
7094

7195
def __init__(
@@ -95,14 +119,14 @@ def __init__(
95119
else:
96120
self.coeffs = None
97121

98-
SMOOTHERS = {"savgol", "left_gauss_linear", "moving_average", "identity"}
122+
valid_smoothers = {"savgol", "left_gauss_linear", "moving_average", "identity"}
99123

100-
if self.smoother_name not in SMOOTHERS:
124+
if self.smoother_name not in valid_smoothers:
101125
raise ValueError("Invalid smoother name given.")
102126

103-
IMPUTE_METHODS = {"savgol", "zeros", None}
127+
valid_impute_methods = {"savgol", "zeros", None}
104128

105-
if self.impute_method not in IMPUTE_METHODS:
129+
if self.impute_method not in valid_impute_methods:
106130
raise ValueError("Invalid impute method given.")
107131

108132
def smooth(self, signal):
@@ -118,6 +142,9 @@ def smooth(self, signal):
118142
signal_smoothed: np.ndarray
119143
A smoothed 1D signal.
120144
"""
145+
if len(signal) < self.window_length:
146+
raise ValueError("The window_length must be smaller than the length of the signal.")
147+
121148
signal = self.impute(signal)
122149

123150
if self.smoother_name == "savgol":
@@ -223,9 +250,9 @@ def left_gauss_linear_smoother(self, signal):
223250

224251
def savgol_predict(self, signal):
225252
"""
226-
Fits a polynomial through the values given by the signal and returns the value
253+
Fits a polynomial through the values given by the signal and returns the value
227254
of the polynomial at the right-most signal-value. More precisely, fits a polynomial
228-
f(t) of degree poly_fit_degree through the points signal[0], signal[1] ..., signal[-1],
255+
f(t) of degree poly_fit_degree through the points signal[0], signal[1] ..., signal[-1],
229256
and returns the evaluation of the polynomial at the location of signal[-1].
230257
231258
Parameters
@@ -250,7 +277,7 @@ def savgol_coeffs(cls, nl, nr, poly_fit_degree, gaussian_bandwidth=100):
250277
Solves for the Savitzky-Golay coefficients. The coefficients c_i
251278
give a filter so that
252279
y = \sum_{i=-{n_l}}^{n_r} c_i x_i
253-
is the value at 0 (thus the constant term) of the polynomial fit
280+
is the value at 0 (thus the constant term) of the polynomial fit
254281
through the points {x_i}. The coefficients are c_i are caluclated as
255282
c_i = ((A.T @ A)^(-1) @ (A.T @ e_i))_0
256283
where A is the design matrix of the polynomial fit and e_i is the standard
@@ -266,7 +293,7 @@ def savgol_coeffs(cls, nl, nr, poly_fit_degree, gaussian_bandwidth=100):
266293
poly_fit_degree: int
267294
The degree of the polynomial to be fit.
268295
gaussian_bandwidth: float or None
269-
If float, performs regression with Gaussian weights whose variance is
296+
If float, performs regression with Gaussian weights whose variance is
270297
the gaussian_bandwidth. If None, performs unweighted regression.
271298
272299
Returns
@@ -396,35 +423,3 @@ def savgol_impute(self, signal):
396423
)
397424
signal_imputed[ix] = signal_imputed[ix - self.window_length : ix] @ coeffs
398425
return signal_imputed
399-
400-
401-
# TODO: this needs a test, probably
402-
def smoothed_values_by_geo_id(
403-
df: pd.DataFrame, method="savgol", **kwargs
404-
) -> np.ndarray:
405-
"""Computes a smoothed version of the variable 'val' within unique values of 'geo_id'
406-
407-
Currently uses a local weighted least squares, where the weights are given
408-
by a Gaussian kernel.
409-
410-
Parameters
411-
----------
412-
df: pd.DataFrame
413-
A data frame with columns "geo_id", "timestamp", and "val"
414-
method: {'savgol', 'left_gauss_linear', 'moving_average'}
415-
A choice of window smoother to use. Check the smoother method definitions
416-
for specific parameters.
417-
418-
Returns
419-
-------
420-
np.ndarray
421-
A one-dimensional numpy array containing the smoothed values.
422-
"""
423-
smoother = Smoother(method, **kwargs)
424-
425-
df = df.copy()
426-
df["val_smooth"] = 0
427-
for geo_id in df["geo_id"].unique():
428-
signal = df[df["geo_id"] == geo_id]["val"].values
429-
df.loc[df["geo_id"] == geo_id, "val_smooth"] = smoother.smooth(signal)
430-
return df["val_smooth"].values

_delphi_utils_python/tests/test_smooth.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
"""
2-
This file contains a number of smoothers left gauss filter used to smooth a 1-d signal.
3-
Code is courtesy of Addison Hu (minor adjustments by Maria).
4-
5-
Author: Maria Jahja
6-
Created: 2020-04-16
7-
2+
Tests for the smoothing utility.
3+
Authors: Dmitry Shemetov, Addison Hu, Maria Jahja
84
"""
95
import pytest
106

@@ -167,3 +163,15 @@ def test_impute(self):
167163
)
168164
with pytest.raises(ValueError):
169165
imputed_signal = smoother.savgol_impute(signal)
166+
167+
# test window_length > len(signal)
168+
signal = np.arange(20)
169+
smoother = Smoother(smoother_name="savgol", boundary_method="identity", window_length=30)
170+
with pytest.raises(ValueError):
171+
smoothed_signal = smoother.smooth(signal)
172+
173+
# test the boundary methods
174+
signal = np.arange(20)
175+
smoother = Smoother(smoother_name="savgol", poly_fit_degree=0, boundary_method="identity", window_length=10)
176+
smoothed_signal = smoother.savgol_impute(signal)
177+
assert np.allclose(smoothed_signal, signal)

0 commit comments

Comments
 (0)