diff --git a/pymc_extras/statespace/core/statespace.py b/pymc_extras/statespace/core/statespace.py index 715808d81..e4f299f6e 100644 --- a/pymc_extras/statespace/core/statespace.py +++ b/pymc_extras/statespace/core/statespace.py @@ -1,7 +1,7 @@ import logging from collections.abc import Callable, Sequence -from typing import Any +from typing import Any, Literal import numpy as np import pandas as pd @@ -822,6 +822,7 @@ def build_statespace_graph( mode: str | None = None, missing_fill_value: float | None = None, cov_jitter: float | None = JITTER_DEFAULT, + mvn_method: Literal["cholesky", "eigh", "svd"] = "svd", save_kalman_filter_outputs_in_idata: bool = False, ) -> None: """ @@ -865,6 +866,14 @@ def build_statespace_graph( - The Univariate Filter is more robust than other filters, and can tolerate a lower jitter value + mvn_method: str, default "svd" + Method used to invert the covariance matrix when calculating the pdf of a multivariate normal + (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust + to ill-conditioned matrices, while "svd" is slow but extremely robust. + + In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is + recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd". + save_kalman_filter_outputs_in_idata: bool, optional, default=False If True, Kalman Filter outputs will be saved in the model as deterministics. Useful for debugging, but should not be necessary for the majority of users. @@ -915,6 +924,7 @@ def build_statespace_graph( logp=logp, observed=data, dims=obs_dims, + method=mvn_method, ) self._fit_coords = pm_mod.coords.copy() @@ -1109,6 +1119,7 @@ def _sample_conditional( group: str, random_seed: RandomState | None = None, data: pt.TensorLike | None = None, + mvn_method: Literal["cholesky", "eigh", "svd"] = "svd", **kwargs, ): """ @@ -1130,6 +1141,14 @@ def _sample_conditional( Observed data on which to condition the model. If not provided, the function will use the data that was provided when the model was built. + mvn_method: str, default "svd" + Method used to invert the covariance matrix when calculating the pdf of a multivariate normal + (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust + to ill-conditioned matrices, while "svd" is slow but extremely robust. + + In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is + recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd". + kwargs: Additional keyword arguments are passed to pymc.sample_posterior_predictive @@ -1181,6 +1200,7 @@ def _sample_conditional( covs=cov, logp=dummy_ll, dims=state_dims, + method=mvn_method, ) obs_mu = (Z @ mu[..., None]).squeeze(-1) @@ -1192,6 +1212,7 @@ def _sample_conditional( covs=obs_cov, logp=dummy_ll, dims=obs_dims, + method=mvn_method, ) # TODO: Remove this after pm.Flat initial values are fixed @@ -1222,6 +1243,7 @@ def _sample_unconditional( steps: int | None = None, use_data_time_dim: bool = False, random_seed: RandomState | None = None, + mvn_method: Literal["cholesky", "eigh", "svd"] = "svd", **kwargs, ): """ @@ -1251,6 +1273,14 @@ def _sample_unconditional( random_seed : int, RandomState or Generator, optional Seed for the random number generator. + mvn_method: str, default "svd" + Method used to invert the covariance matrix when calculating the pdf of a multivariate normal + (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust + to ill-conditioned matrices, while "svd" is slow but extremely robust. + + In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is + recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd". + kwargs: Additional keyword arguments are passed to pymc.sample_posterior_predictive @@ -1309,6 +1339,7 @@ def _sample_unconditional( steps=steps, dims=dims, mode=self._fit_mode, + method=mvn_method, sequence_names=self.kalman_filter.seq_names, k_endog=self.k_endog, ) @@ -1331,7 +1362,11 @@ def _sample_unconditional( return idata_unconditional.posterior_predictive def sample_conditional_prior( - self, idata: InferenceData, random_seed: RandomState | None = None, **kwargs + self, + idata: InferenceData, + random_seed: RandomState | None = None, + mvn_method: Literal["cholesky", "eigh", "svd"] = "svd", + **kwargs, ) -> InferenceData: """ Sample from the conditional prior; that is, given parameter draws from the prior distribution, @@ -1347,6 +1382,14 @@ def sample_conditional_prior( random_seed : int, RandomState or Generator, optional Seed for the random number generator. + mvn_method: str, default "svd" + Method used to invert the covariance matrix when calculating the pdf of a multivariate normal + (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust + to ill-conditioned matrices, while "svd" is slow but extremely robust. + + In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is + recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd". + kwargs: Additional keyword arguments are passed to pymc.sample_posterior_predictive @@ -1358,10 +1401,16 @@ def sample_conditional_prior( "predicted_prior", and "smoothed_prior". """ - return self._sample_conditional(idata, "prior", random_seed, **kwargs) + return self._sample_conditional( + idata=idata, group="prior", random_seed=random_seed, mvn_method=mvn_method, **kwargs + ) def sample_conditional_posterior( - self, idata: InferenceData, random_seed: RandomState | None = None, **kwargs + self, + idata: InferenceData, + random_seed: RandomState | None = None, + mvn_method: Literal["cholesky", "eigh", "svd"] = "svd", + **kwargs, ): """ Sample from the conditional posterior; that is, given parameter draws from the posterior distribution, @@ -1376,6 +1425,14 @@ def sample_conditional_posterior( random_seed : int, RandomState or Generator, optional Seed for the random number generator. + mvn_method: str, default "svd" + Method used to invert the covariance matrix when calculating the pdf of a multivariate normal + (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust + to ill-conditioned matrices, while "svd" is slow but extremely robust. + + In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is + recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd". + kwargs: Additional keyword arguments are passed to pymc.sample_posterior_predictive @@ -1387,7 +1444,9 @@ def sample_conditional_posterior( "predicted_posterior", and "smoothed_posterior". """ - return self._sample_conditional(idata, "posterior", random_seed, **kwargs) + return self._sample_conditional( + idata=idata, group="posterior", random_seed=random_seed, mvn_method=mvn_method, **kwargs + ) def sample_unconditional_prior( self, @@ -1395,6 +1454,7 @@ def sample_unconditional_prior( steps: int | None = None, use_data_time_dim: bool = False, random_seed: RandomState | None = None, + mvn_method: Literal["cholesky", "eigh", "svd"] = "svd", **kwargs, ) -> InferenceData: """ @@ -1423,6 +1483,14 @@ def sample_unconditional_prior( random_seed : int, RandomState or Generator, optional Seed for the random number generator. + mvn_method: str, default "svd" + Method used to invert the covariance matrix when calculating the pdf of a multivariate normal + (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust + to ill-conditioned matrices, while "svd" is slow but extremely robust. + + In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is + recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd". + kwargs: Additional keyword arguments are passed to pymc.sample_posterior_predictive @@ -1439,7 +1507,13 @@ def sample_unconditional_prior( """ return self._sample_unconditional( - idata, "prior", steps, use_data_time_dim, random_seed, **kwargs + idata=idata, + group="prior", + steps=steps, + use_data_time_dim=use_data_time_dim, + random_seed=random_seed, + mvn_method=mvn_method, + **kwargs, ) def sample_unconditional_posterior( @@ -1448,6 +1522,7 @@ def sample_unconditional_posterior( steps: int | None = None, use_data_time_dim: bool = False, random_seed: RandomState | None = None, + mvn_method: Literal["cholesky", "eigh", "svd"] = "svd", **kwargs, ) -> InferenceData: """ @@ -1477,6 +1552,14 @@ def sample_unconditional_posterior( random_seed : int, RandomState or Generator, optional Seed for the random number generator. + mvn_method: str, default "svd" + Method used to invert the covariance matrix when calculating the pdf of a multivariate normal + (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust + to ill-conditioned matrices, while "svd" is slow but extremely robust. + + In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is + recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd". + Returns ------- InferenceData @@ -1490,7 +1573,13 @@ def sample_unconditional_posterior( """ return self._sample_unconditional( - idata, "posterior", steps, use_data_time_dim, random_seed, **kwargs + idata=idata, + group="posterior", + steps=steps, + use_data_time_dim=use_data_time_dim, + random_seed=random_seed, + mvn_method=mvn_method, + **kwargs, ) def sample_statespace_matrices( @@ -1933,6 +2022,7 @@ def forecast( filter_output="smoothed", random_seed: RandomState | None = None, verbose: bool = True, + mvn_method: Literal["cholesky", "eigh", "svd"] = "svd", **kwargs, ) -> InferenceData: """ @@ -1989,6 +2079,14 @@ def forecast( verbose: bool, default=True Whether to print diagnostic information about forecasting. + mvn_method: str, default "svd" + Method used to invert the covariance matrix when calculating the pdf of a multivariate normal + (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust + to ill-conditioned matrices, while "svd" is slow but extremely robust. + + In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is + recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd". + kwargs: Additional keyword arguments are passed to pymc.sample_posterior_predictive @@ -2098,6 +2196,7 @@ def forecast( sequence_names=self.kalman_filter.seq_names, k_endog=self.k_endog, append_x0=False, + method=mvn_method, ) forecast_model.rvs_to_initial_values = { @@ -2126,6 +2225,7 @@ def impulse_response_function( shock_trajectory: np.ndarray | None = None, orthogonalize_shocks: bool = False, random_seed: RandomState | None = None, + mvn_method: Literal["cholesky", "eigh", "svd"] = "svd", **kwargs, ): """ @@ -2177,6 +2277,14 @@ def impulse_response_function( random_seed : int, RandomState or Generator, optional Seed for the random number generator. + mvn_method: str, default "svd" + Method used to invert the covariance matrix when calculating the pdf of a multivariate normal + (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust + to ill-conditioned matrices, while "svd" is slow but extremely robust. + + In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is + recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd". + kwargs: Additional keyword arguments are passed to pymc.sample_posterior_predictive @@ -2236,7 +2344,7 @@ def impulse_response_function( shock_trajectory = pt.zeros((n_steps, self.k_posdef)) if Q is not None: init_shock = pm.MvNormal( - "initial_shock", mu=0, cov=Q, dims=[SHOCK_DIM], method="svd" + "initial_shock", mu=0, cov=Q, dims=[SHOCK_DIM], method=mvn_method ) else: init_shock = pm.Deterministic( diff --git a/pymc_extras/statespace/filters/distributions.py b/pymc_extras/statespace/filters/distributions.py index 1e4f2b153..fc72d2a34 100644 --- a/pymc_extras/statespace/filters/distributions.py +++ b/pymc_extras/statespace/filters/distributions.py @@ -72,6 +72,7 @@ def __new__( mode=None, sequence_names=None, append_x0=True, + method="svd", **kwargs, ): # Ignore dims in support shape because they are just passed along to the "observed" and "latent" distributions @@ -100,6 +101,7 @@ def __new__( mode=mode, sequence_names=sequence_names, append_x0=append_x0, + method=method, **kwargs, ) @@ -119,6 +121,7 @@ def dist( mode=None, sequence_names=None, append_x0=True, + method="svd", **kwargs, ): steps = get_support_shape_1d( @@ -135,6 +138,7 @@ def dist( mode=mode, sequence_names=sequence_names, append_x0=append_x0, + method=method, **kwargs, ) @@ -155,6 +159,7 @@ def rv_op( mode=None, sequence_names=None, append_x0=True, + method="svd", ): if sequence_names is None: sequence_names = [] @@ -205,10 +210,10 @@ def step_fn(*args): a = state[:k] middle_rng, a_innovation = pm.MvNormal.dist( - mu=0, cov=Q, rng=rng, method="svd" + mu=0, cov=Q, rng=rng, method=method ).owner.outputs next_rng, y_innovation = pm.MvNormal.dist( - mu=0, cov=H, rng=middle_rng, method="svd" + mu=0, cov=H, rng=middle_rng, method=method ).owner.outputs a_mu = c + T @ a @@ -224,8 +229,8 @@ def step_fn(*args): Z_init = Z_ if Z_ in non_sequences else Z_[0] H_init = H_ if H_ in non_sequences else H_[0] - init_x_ = pm.MvNormal.dist(a0_, P0_, rng=rng, method="svd") - init_y_ = pm.MvNormal.dist(Z_init @ init_x_, H_init, rng=rng, method="svd") + init_x_ = pm.MvNormal.dist(a0_, P0_, rng=rng, method=method) + init_y_ = pm.MvNormal.dist(Z_init @ init_x_, H_init, rng=rng, method=method) init_dist_ = pt.concatenate([init_x_, init_y_], axis=0) @@ -281,6 +286,7 @@ def __new__( sequence_names=None, mode=None, append_x0=True, + method="svd", **kwargs, ): dims = kwargs.pop("dims", None) @@ -310,6 +316,7 @@ def __new__( mode=mode, sequence_names=sequence_names, append_x0=append_x0, + method=method, **kwargs, ) latent_obs_combined = pt.specify_shape(latent_obs_combined, (steps + int(append_x0), None)) @@ -368,11 +375,11 @@ def __new__(cls, *args, **kwargs): return super().__new__(cls, *args, **kwargs) @classmethod - def dist(cls, mus, covs, logp, **kwargs): - return super().dist([mus, covs, logp], **kwargs) + def dist(cls, mus, covs, logp, method="svd", **kwargs): + return super().dist([mus, covs, logp], method=method, **kwargs) @classmethod - def rv_op(cls, mus, covs, logp, size=None): + def rv_op(cls, mus, covs, logp, method="svd", size=None): # Batch dimensions (if any) will be on the far left, but scan requires time to be there instead if mus.ndim > 2: mus = pt.moveaxis(mus, -2, 0) @@ -385,7 +392,7 @@ def rv_op(cls, mus, covs, logp, size=None): rng = pytensor.shared(np.random.default_rng()) def step(mu, cov, rng): - new_rng, mvn = pm.MvNormal.dist(mu=mu, cov=cov, rng=rng, method="svd").owner.outputs + new_rng, mvn = pm.MvNormal.dist(mu=mu, cov=cov, rng=rng, method=method).owner.outputs return mvn, {rng: new_rng} mvn_seq, updates = pytensor.scan(