Skip to content

Commit 7cfed72

Browse files
committed
Add smoother interface for handling pandas Series:
* converts to numpy arrays under the hood and converts back after * updated docstrings and tests to match
1 parent 4bf3410 commit 7cfed72

File tree

2 files changed

+62
-11
lines changed

2 files changed

+62
-11
lines changed

_delphi_utils_python/delphi_utils/smooth.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,17 @@
1313
import warnings
1414

1515
import numpy as np
16+
import pandas as pd
1617

1718

1819
class Smoother:
1920
"""
2021
This is the smoothing utility class. This class holds the parameter settings for its smoother
2122
methods and provides reasonable defaults. Basic usage can be found in the examples below.
2223
23-
The smoother function takes numpy arrays as input, expecting the values to come from a
24-
regularly-spaced time grid. NANs are ok, as long as the array does not begin with a NAN. The
25-
rest of the NANs will be handled via imputation by default, though this can be turned off.
24+
The smoother function takes numpy arrays or pandas Series as input, expecting the values to be
25+
on a regularly-spaced time grid. NANs are ok, as long as the array does not begin with a NAN.
26+
The rest of the NANs will be handled via imputation by default, though this can be turned off.
2627
2728
Parameters
2829
----------
@@ -64,8 +65,9 @@ class Smoother:
6465
6566
Methods
6667
----------
67-
smooth: np.ndarray
68-
Takes a 1D signal and returns a smoothed version. Both arrays have the same length.
68+
smooth: np.ndarray or pd.Series
69+
Takes a 1D signal and returns a smoothed version. The input and the output have the same length
70+
and type.
6971
7072
Example Usage
7173
-------------
@@ -75,7 +77,7 @@ class Smoother:
7577
7678
Example 2. Smooth a dataframe column.
7779
>>> smoother = Smoother(smoother_name='savgol')
78-
>>> df[col] = pd.Series(smoother.smooth(df[col].to_numpy()))
80+
>>> df[col] = df[col].transform(smoother.smooth)
7981
8082
Example 3. Apply a rolling weighted average smoother, with 95% weight on the recent 2 weeks and
8183
a sharp cutoff after 4 weeks.
@@ -133,17 +135,21 @@ def smooth(self, signal):
133135
134136
Parameters
135137
----------
136-
signal: np.ndarray
138+
signal: np.ndarray or pd.Series
137139
A 1D signal to be smoothed.
138140
139-
signal_smoothed: np.ndarray
140-
A smoothed 1D signal.
141+
signal_smoothed: np.ndarray or pd.Series
142+
A smoothed 1D signal. Returns an array of the same type and length as
143+
the input.
141144
"""
142145
if len(signal) < self.window_length:
143146
raise ValueError(
144147
"The window_length must be smaller than the length of the signal."
145148
)
146149

150+
is_pandas_series = isinstance(signal, pd.Series)
151+
signal = signal.to_numpy() if is_pandas_series else signal
152+
147153
signal = self.impute(signal)
148154

149155
if self.smoother_name == "savgol":
@@ -155,7 +161,7 @@ def smooth(self, signal):
155161
elif self.smoother_name == "identity":
156162
signal_smoothed = signal
157163

158-
return signal_smoothed
164+
return signal_smoothed if not is_pandas_series else pd.Series(signal_smoothed)
159165

160166
def impute(self, signal):
161167
"""
@@ -281,7 +287,7 @@ def savgol_coeffs(self, nl, nr):
281287
"""
282288
Solves for the Savitzky-Golay coefficients. The coefficients c_i
283289
give a filter so that
284-
y = \sum_{i=-{n_l}}^{n_r} c_i x_i
290+
y = sum_{i=-{n_l}}^{n_r} c_i x_i
285291
is the value at 0 (thus the constant term) of the polynomial fit
286292
through the points {x_i}. The coefficients are c_i are calculated as
287293
c_i = ((A.T @ A)^(-1) @ (A.T @ e_i))_0

_delphi_utils_python/tests/test_smooth.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66

77
import numpy as np
8+
import pandas as pd
89
from delphi_utils import Smoother
910

1011

@@ -178,3 +179,47 @@ def test_impute(self):
178179
boundary_method="identity", window_length=10)
179180
smoothed_signal = smoother.savgol_impute(signal)
180181
assert np.allclose(smoothed_signal, signal)
182+
183+
def test_pandas_series_input(self):
184+
# The savgol method should match the linear regression method on the first
185+
# window_length-many values of the signal, if the savgol_weighting is set to true,
186+
# and the polynomial fit degree is set to 1. Beyond that, there will be very small
187+
# differences between the signals (due to "left_gauss_linear" not having a window_length
188+
# cutoff).
189+
window_length = 50
190+
signal = pd.Series(np.arange(window_length) + np.random.randn(window_length))
191+
smoother = Smoother(smoother_name="left_gauss_linear")
192+
smoothed_signal1 = smoother.smooth(signal)
193+
smoother = Smoother(
194+
smoother_name="savgol", window_length=window_length, poly_fit_degree=1,
195+
)
196+
smoothed_signal2 = smoother.smooth(signal)
197+
198+
assert np.allclose(smoothed_signal1, smoothed_signal2)
199+
200+
window_length = 50
201+
signal = pd.Series(np.arange(window_length) + np.random.randn(window_length))
202+
smoother = Smoother(smoother_name="left_gauss_linear")
203+
smoothed_signal1 = signal.transform(smoother.smooth)
204+
smoother = Smoother(
205+
smoother_name="savgol", window_length=window_length, poly_fit_degree=1,
206+
)
207+
smoothed_signal2 = signal.transform(smoother.smooth)
208+
209+
assert np.allclose(smoothed_signal1, smoothed_signal2)
210+
211+
# The raw and smoothed lengths should match
212+
signal = pd.Series(np.ones(30))
213+
smoother = Smoother(smoother_name="moving_average")
214+
smoothed_signal = signal.transform(smoother.smooth)
215+
assert len(signal) == len(smoothed_signal)
216+
217+
# The raw and smoothed arrays should be identical on constant data
218+
# modulo the nans
219+
signal = pd.Series(np.ones(30))
220+
window_length = 10
221+
smoother = Smoother(smoother_name="moving_average", window_length=window_length)
222+
smoothed_signal = signal.transform(smoother.smooth)
223+
assert np.allclose(
224+
signal[window_length - 1 :], smoothed_signal[window_length - 1 :]
225+
)

0 commit comments

Comments
 (0)