11
11
import pandas as pd
12
12
import warnings
13
13
from functools import wraps
14
+ from numpy .polynomial .polynomial import Polynomial as NpPolynomial
14
15
from sklearn .decomposition import PCA , FastICA
15
16
from scipy .signal import find_peaks
16
17
from tqdm import tqdm
17
18
from warnings import warn
18
19
from pandas .api .indexers import BaseIndexer
19
20
from collections .abc import Callable
20
- from typing import Any , TYPE_CHECKING
21
+ from typing import Any , TYPE_CHECKING , Literal
21
22
22
23
from .config import defaults
23
24
from .timeseries import Timeseries
@@ -221,13 +222,17 @@ def median(array: np.ndarray, kernel_size: int) -> np.ndarray:
221
222
222
223
@unwrap_dict_and_ts
223
224
def decompose (array : np .ndarray ,
224
- method : str ,
225
+ method : Literal [ "pca" , "ica" ] ,
225
226
num_components : int = 1 ,
226
227
return_sources : bool = False ,
228
+ detrend : bool | np .ndarray = False ,
229
+ impute : bool = False ,
227
230
rng : np .random .Generator | None = None
228
- ) -> tuple [np .ndarray , np .ndarray | None , np .ndarray | None ]:
231
+ ) -> np . ndarray | tuple [np .ndarray , np .ndarray , np .ndarray ]:
229
232
r"""
230
233
Decomposes the input signal into different components using PCA or ICA.
234
+ Optionally detrends the data and/or fills missing data using the data
235
+ decomposition model.
231
236
232
237
Parameters
233
238
----------
@@ -247,6 +252,14 @@ def decompose(array: np.ndarray,
247
252
return_sources
248
253
If ``True``, return not only the best-fit model, but also the sources
249
254
themselves in space and time. Defaults to ``False``.
255
+ detrend
256
+ If ``True``, detrend the data before decomposing it assuming uniform data
257
+ sampling. Alternatively, a 1D array containing the data indices for detrending.
258
+ impute
259
+ If ``False``, the input data is overwritten with the fitted model, and missing
260
+ data is kept missing.
261
+ If ``True``, input data is kept and the fitted data decomposition model is
262
+ only used to fill the missing values.
250
263
rng
251
264
Random number generator instance to use to fill missing values.
252
265
@@ -281,10 +294,32 @@ def decompose(array: np.ndarray,
281
294
finite_cols = np .nonzero (~ array_nanind .all (axis = 0 ))[0 ]
282
295
nan_cols = np .nonzero (array_nanind .all (axis = 0 ))[0 ]
283
296
array = array [:, finite_cols ]
297
+ array_nanind = np .isnan (array )
298
+ # save values for later reinsertion if imputing data
299
+ if impute :
300
+ array_in = array .copy ()
301
+ # detrend if desired
302
+ if isinstance (detrend , np .ndarray ) or (isinstance (detrend , bool ) and detrend ):
303
+ if isinstance (detrend , np .ndarray ):
304
+ assert (detrend .ndim == 1 ) and (detrend .size == array .shape [0 ]), \
305
+ f"'detrend' array needs to be 1D with length { array .shape [0 ]} , got " \
306
+ f"shape { detrend .shape } instead."
307
+ x = detrend
308
+ else :
309
+ x = np .arange (array .shape [0 ])
310
+ fits = [NpPolynomial .fit (x [~ array_nanind [:, i ]],
311
+ array [:, i ][~ array_nanind [:, i ]], 1 )
312
+ for i in range (array .shape [1 ])]
313
+ array_trend = np .stack ([f (x ) for f in fits ], axis = 1 )
314
+ array -= array_trend
315
+ detrended = True
316
+ elif not isinstance (detrend , bool ):
317
+ raise ValueError (f"Unrecognized 'detrend' argument type: '{ type (detrend )} '." )
318
+ else :
319
+ detrended = False
284
320
# fill NaNs with white Gaussian noise
285
321
array_nanmean = np .nanmean (array , axis = 0 )
286
322
array_nansd = np .nanstd (array , axis = 0 )
287
- array_nanind = np .isnan (array )
288
323
if rng is None :
289
324
rng = np .random .default_rng ()
290
325
else :
@@ -294,29 +329,36 @@ def decompose(array: np.ndarray,
294
329
array [array_nanind [:, icol ], icol ] = array_nanmean [icol ] + \
295
330
array_nansd [icol ] * rng .normal (size = array_nanind [:, icol ].sum ())
296
331
# decompose using the specified solver
297
- if method == 'pca' :
332
+ if method . lower () == 'pca' :
298
333
decomposer = PCA (n_components = num_components , whiten = True )
299
- elif method == 'ica' :
334
+ elif method . lower () == 'ica' :
300
335
decomposer = FastICA (n_components = num_components , whiten = "unit-variance" )
301
336
else :
302
337
raise NotImplementedError ("Cannot estimate the common mode error "
303
338
f"using the '{ method } ' method." )
304
339
# extract temporal component and build model
305
340
temporal = decomposer .fit_transform (array )
306
341
model = decomposer .inverse_transform (temporal )
307
- # reduce to where original timeseries were not NaNs and return
308
- model [array_nanind ] = np .NaN
342
+ # add trends back in if they were removed
343
+ if detrended :
344
+ model += array_trend
345
+ # restore original data
346
+ if impute :
347
+ for icol in range (model .shape [1 ]):
348
+ model [~ array_nanind [:, icol ], icol ] = array_in [~ array_nanind [:, icol ], icol ]
349
+ # reduce to where original timeseries were not NaNs
350
+ else :
351
+ model [array_nanind ] = np .NaN
309
352
if nan_cols .size > 0 :
310
- newmod = np .empty ((temporal .shape [0 ], len (finite_cols ) + len (nan_cols )))
353
+ newmod = np .full ((temporal .shape [0 ], len (finite_cols ) + len (nan_cols )), np . NaN )
311
354
newmod [:, finite_cols ] = model
312
- newmod [:, nan_cols ] = np .NaN
313
355
model = newmod
356
+ # extract spatial component if to be returned, else done
314
357
if return_sources :
315
358
spatial = decomposer .components_
316
359
if nan_cols .size > 0 :
317
- newspat = np .empty ((spatial .shape [0 ], len (finite_cols ) + len (nan_cols )))
360
+ newspat = np .full ((spatial .shape [0 ], len (finite_cols ) + len (nan_cols )), np . NaN )
318
361
newspat [:, finite_cols ] = spatial
319
- newspat [:, nan_cols ] = np .NaN
320
362
spatial = newspat
321
363
return model , temporal , spatial
322
364
else :
0 commit comments