Skip to content

Commit 28ee7a1

Browse files
committed
add detrend and impute to processing.decompose
1 parent d0d87e8 commit 28ee7a1

File tree

1 file changed

+54
-12
lines changed

1 file changed

+54
-12
lines changed

disstans/processing.py

+54-12
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,14 @@
1111
import pandas as pd
1212
import warnings
1313
from functools import wraps
14+
from numpy.polynomial.polynomial import Polynomial as NpPolynomial
1415
from sklearn.decomposition import PCA, FastICA
1516
from scipy.signal import find_peaks
1617
from tqdm import tqdm
1718
from warnings import warn
1819
from pandas.api.indexers import BaseIndexer
1920
from collections.abc import Callable
20-
from typing import Any, TYPE_CHECKING
21+
from typing import Any, TYPE_CHECKING, Literal
2122

2223
from .config import defaults
2324
from .timeseries import Timeseries
@@ -221,13 +222,17 @@ def median(array: np.ndarray, kernel_size: int) -> np.ndarray:
221222

222223
@unwrap_dict_and_ts
223224
def decompose(array: np.ndarray,
224-
method: str,
225+
method: Literal["pca", "ica"],
225226
num_components: int = 1,
226227
return_sources: bool = False,
228+
detrend: bool | np.ndarray = False,
229+
impute: bool = False,
227230
rng: np.random.Generator | None = None
228-
) -> tuple[np.ndarray, np.ndarray | None, np.ndarray | None]:
231+
) -> np.ndarray | tuple[np.ndarray, np.ndarray, np.ndarray]:
229232
r"""
230233
Decomposes the input signal into different components using PCA or ICA.
234+
Optionally detrends the data and/or fills missing data using the data
235+
decomposition model.
231236
232237
Parameters
233238
----------
@@ -247,6 +252,14 @@ def decompose(array: np.ndarray,
247252
return_sources
248253
If ``True``, return not only the best-fit model, but also the sources
249254
themselves in space and time. Defaults to ``False``.
255+
detrend
256+
If ``True``, detrend the data before decomposing it assuming uniform data
257+
sampling. Alternatively, a 1D array containing the data indices for detrending.
258+
impute
259+
If ``False``, the input data is overwritten with the fitted model, and missing
260+
data is kept missing.
261+
If ``True``, input data is kept and the fitted data decomposition model is
262+
only used to fill the missing values.
250263
rng
251264
Random number generator instance to use to fill missing values.
252265
@@ -281,10 +294,32 @@ def decompose(array: np.ndarray,
281294
finite_cols = np.nonzero(~array_nanind.all(axis=0))[0]
282295
nan_cols = np.nonzero(array_nanind.all(axis=0))[0]
283296
array = array[:, finite_cols]
297+
array_nanind = np.isnan(array)
298+
# save values for later reinsertion if imputing data
299+
if impute:
300+
array_in = array.copy()
301+
# detrend if desired
302+
if isinstance(detrend, np.ndarray) or (isinstance(detrend, bool) and detrend):
303+
if isinstance(detrend, np.ndarray):
304+
assert (detrend.ndim == 1) and (detrend.size == array.shape[0]), \
305+
f"'detrend' array needs to be 1D with length {array.shape[0]}, got " \
306+
f"shape {detrend.shape} instead."
307+
x = detrend
308+
else:
309+
x = np.arange(array.shape[0])
310+
fits = [NpPolynomial.fit(x[~array_nanind[:, i]],
311+
array[:, i][~array_nanind[:, i]], 1)
312+
for i in range(array.shape[1])]
313+
array_trend = np.stack([f(x) for f in fits], axis=1)
314+
array -= array_trend
315+
detrended = True
316+
elif not isinstance(detrend, bool):
317+
raise ValueError(f"Unrecognized 'detrend' argument type: '{type(detrend)}'.")
318+
else:
319+
detrended = False
284320
# fill NaNs with white Gaussian noise
285321
array_nanmean = np.nanmean(array, axis=0)
286322
array_nansd = np.nanstd(array, axis=0)
287-
array_nanind = np.isnan(array)
288323
if rng is None:
289324
rng = np.random.default_rng()
290325
else:
@@ -294,29 +329,36 @@ def decompose(array: np.ndarray,
294329
array[array_nanind[:, icol], icol] = array_nanmean[icol] + \
295330
array_nansd[icol] * rng.normal(size=array_nanind[:, icol].sum())
296331
# decompose using the specified solver
297-
if method == 'pca':
332+
if method.lower() == 'pca':
298333
decomposer = PCA(n_components=num_components, whiten=True)
299-
elif method == 'ica':
334+
elif method.lower() == 'ica':
300335
decomposer = FastICA(n_components=num_components, whiten="unit-variance")
301336
else:
302337
raise NotImplementedError("Cannot estimate the common mode error "
303338
f"using the '{method}' method.")
304339
# extract temporal component and build model
305340
temporal = decomposer.fit_transform(array)
306341
model = decomposer.inverse_transform(temporal)
307-
# reduce to where original timeseries were not NaNs and return
308-
model[array_nanind] = np.NaN
342+
# add trends back in if they were removed
343+
if detrended:
344+
model += array_trend
345+
# restore original data
346+
if impute:
347+
for icol in range(model.shape[1]):
348+
model[~array_nanind[:, icol], icol] = array_in[~array_nanind[:, icol], icol]
349+
# reduce to where original timeseries were not NaNs
350+
else:
351+
model[array_nanind] = np.NaN
309352
if nan_cols.size > 0:
310-
newmod = np.empty((temporal.shape[0], len(finite_cols) + len(nan_cols)))
353+
newmod = np.full((temporal.shape[0], len(finite_cols) + len(nan_cols)), np.NaN)
311354
newmod[:, finite_cols] = model
312-
newmod[:, nan_cols] = np.NaN
313355
model = newmod
356+
# extract spatial component if to be returned, else done
314357
if return_sources:
315358
spatial = decomposer.components_
316359
if nan_cols.size > 0:
317-
newspat = np.empty((spatial.shape[0], len(finite_cols) + len(nan_cols)))
360+
newspat = np.full((spatial.shape[0], len(finite_cols) + len(nan_cols)), np.NaN)
318361
newspat[:, finite_cols] = spatial
319-
newspat[:, nan_cols] = np.NaN
320362
spatial = newspat
321363
return model, temporal, spatial
322364
else:

0 commit comments

Comments
 (0)