diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index c059984958..bf6addb900 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -12,15 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Tuple, Union + import aesara.tensor as at import numpy as np from aesara import scan -from scipy import stats +from aesara.tensor.random.op import RandomVariable -from pymc.distributions import distribution, multivariate +from pymc.aesaraf import change_rv_size, floatX, intX +from pymc.distributions import distribution, logprob, multivariate from pymc.distributions.continuous import Flat, Normal, get_tau_sigma +from pymc.distributions.dist_math import check_parameters from pymc.distributions.shape_utils import to_tuple +from pymc.util import check_dist_not_registered __all__ = [ "AR1", @@ -33,6 +38,195 @@ ] +class GaussianRandomWalkRV(RandomVariable): + """ + GaussianRandomWalk Random Variable + """ + + name = "GaussianRandomWalk" + ndim_supp = 1 + ndims_params = [0, 0, 0, 0] + dtype = "floatX" + _print_name = ("GaussianRandomWalk", "\\operatorname{GaussianRandomWalk}") + + def make_node(self, rng, size, dtype, mu, sigma, init, steps): + steps = at.as_tensor_variable(steps) + if not steps.ndim == 0 or not steps.dtype.startswith("int"): + raise ValueError("steps must be an integer scalar (ndim=0).") + + return super().make_node(rng, size, dtype, mu, sigma, init, steps) + + def _supp_shape_from_params(self, dist_params, reop_param_idx=0, param_shapes=None): + steps = dist_params[3] + + return (steps + 1,) + + @classmethod + def rng_fn( + cls, + rng: np.random.RandomState, + mu: Union[np.ndarray, float], + sigma: Union[np.ndarray, float], + init: float, + steps: int, + size: Tuple[int], + ) -> np.ndarray: + """Gaussian Random Walk generator. + + The init value is drawn from the Normal distribution with the same sigma as the + innovations. + + Notes + ----- + Currently does not support custom init distribution + + Parameters + ---------- + rng: np.random.RandomState + Numpy random number generator + mu: array_like + Random walk mean + sigma: np.ndarray + Standard deviation of innovation (sigma > 0) + init: float + Initialization value for GaussianRandomWalk + steps: int + Length of random walk, must be greater than 1. Returned array will be of size+1 to + account as first value is initial value + size: int + The number of Random Walk time series generated + + Returns + ------- + ndarray + """ + + if steps < 1: + raise ValueError("Steps must be greater than 0") + + # If size is None then the returned series should be (*implied_dims, 1+steps) + if size is None: + # broadcast parameters with each other to find implied dims + bcast_shape = np.broadcast_shapes( + np.asarray(mu).shape, + np.asarray(sigma).shape, + np.asarray(init).shape, + ) + dist_shape = (*bcast_shape, int(steps)) + + # If size is None then the returned series should be (*size, 1+steps) + else: + init_size = (*size, 1) + dist_shape = (*size, int(steps)) + + innovations = rng.normal(loc=mu, scale=sigma, size=dist_shape) + grw = np.concatenate([init[..., None], innovations], axis=-1) + return np.cumsum(grw, axis=-1) + + +gaussianrandomwalk = GaussianRandomWalkRV() + + +class GaussianRandomWalk(distribution.Continuous): + r"""Random Walk with Normal innovations + + Parameters + ---------- + mu : tensor_like of float + innovation drift, defaults to 0.0 + sigma : tensor_like of float, optional + sigma > 0, innovation standard deviation, defaults to 1.0 + init : Univariate PyMC distribution + Univariate distribution of the initial value, created with the `.dist()` API. + Defaults to Normal with same `mu` and `sigma` as the GaussianRandomWalk + steps : int + Number of steps in Gaussian Random Walks (steps > 0). + """ + + rv_op = gaussianrandomwalk + + def __new__(cls, name, mu=0.0, sigma=1.0, init=None, steps=None, **kwargs): + if init is not None: + check_dist_not_registered(init) + return super().__new__(cls, name, mu, sigma, init, steps, **kwargs) + + @classmethod + def dist( + cls, mu=0.0, sigma=1.0, init=None, steps=None, size=None, **kwargs + ) -> at.TensorVariable: + + mu = at.as_tensor_variable(floatX(mu)) + sigma = at.as_tensor_variable(floatX(sigma)) + if steps is None: + raise ValueError("Must specify steps parameter") + steps = at.as_tensor_variable(intX(steps)) + + shape = kwargs.get("shape", None) + if size is None and shape is None: + init_size = None + else: + init_size = to_tuple(size) if size is not None else to_tuple(shape)[:-1] + + # If no scalar distribution is passed then initialize with a Normal of same mu and sigma + if init is None: + init = Normal.dist(mu, sigma, size=init_size) + else: + if not ( + isinstance(init, at.TensorVariable) + and init.owner is not None + and isinstance(init.owner.op, RandomVariable) + and init.owner.op.ndim_supp == 0 + ): + raise TypeError("init must be a univariate distribution variable") + + if init_size is not None: + init = change_rv_size(init, init_size) + else: + # If not explicit, size is determined by the shapes of mu, sigma, and init + bcast_shape = at.broadcast_arrays(mu, sigma, init)[0].shape + init = change_rv_size(init, bcast_shape) + + # Ignores logprob of init var because that's accounted for in the logp method + init.tag.ignore_logprob = True + + return super().dist([mu, sigma, init, steps], size=size, **kwargs) + + def logp( + value: at.Variable, + mu: at.Variable, + sigma: at.Variable, + init: at.Variable, + steps: at.Variable, + ) -> at.TensorVariable: + """Calculate log-probability of Gaussian Random Walk distribution at specified value. + + Parameters + ---------- + value: at.Variable, + mu: at.Variable, + sigma: at.Variable, + init: at.Variable, Not used + steps: at.Variable, Not used + + Returns + ------- + TensorVariable + """ + + # Calculate initialization logp + init_logp = logprob.logp(init, value[..., 0]) + + # Make time series stationary around the mean value + stationary_series = value[..., 1:] - value[..., :-1] + series_logp = logprob.logp(Normal.dist(mu, sigma), stationary_series) + + return check_parameters( + init_logp + series_logp.sum(axis=-1), + steps > 0, + msg="steps > 0", + ) + + class AR1(distribution.Continuous): """ Autoregressive process with 1 lag. @@ -171,125 +365,6 @@ def logp(self, value): return at.sum(innov_like) + at.sum(init_like) -class GaussianRandomWalk(distribution.Continuous): - r"""Random Walk with Normal innovations - - Note that this is mainly a user-friendly wrapper to enable an easier specification - of GRW. You are not restricted to use only Normal innovations but can use any - distribution: just use `aesara.tensor.cumsum()` to create the random walk behavior. - - Parameters - ---------- - mu : tensor_like of float, default 0 - innovation drift, defaults to 0.0 - For vector valued `mu`, first dimension must match shape of the random walk, and - the first element will be discarded (since there is no innovation in the first timestep) - sigma : tensor_like of float, optional - `sigma` > 0, innovation standard deviation (only required if `tau` is not specified) - For vector valued `sigma`, first dimension must match shape of the random walk, and - the first element will be discarded (since there is no innovation in the first timestep) - tau : tensor_like of float, optional - `tau` > 0, innovation precision (only required if `sigma` is not specified) - For vector valued `tau`, first dimension must match shape of the random walk, and - the first element will be discarded (since there is no innovation in the first timestep) - init : pymc.Distribution, optional - distribution for initial value (Defaults to Flat()) - """ - - def __init__(self, tau=None, init=None, sigma=None, mu=0.0, *args, **kwargs): - kwargs.setdefault("shape", 1) - super().__init__(*args, **kwargs) - if sum(self.shape) == 0: - raise TypeError("GaussianRandomWalk must be supplied a non-zero shape argument!") - tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.tau = at.as_tensor_variable(tau) - sigma = at.as_tensor_variable(sigma) - self.sigma = sigma - self.mu = at.as_tensor_variable(mu) - self.init = init or Flat.dist() - self.mean = at.as_tensor_variable(0.0) - - def _mu_and_sigma(self, mu, sigma): - """Helper to get mu and sigma if they are high dimensional.""" - if sigma.ndim > 0: - sigma = sigma[1:] - if mu.ndim > 0: - mu = mu[1:] - return mu, sigma - - def logp(self, x): - """ - Calculate log-probability of Gaussian Random Walk distribution at specified value. - - Parameters - ---------- - x : numeric - Value for which log-probability is calculated. - - Returns - ------- - TensorVariable - """ - if x.ndim > 0: - x_im1 = x[:-1] - x_i = x[1:] - mu, sigma = self._mu_and_sigma(self.mu, self.sigma) - innov_like = Normal.dist(mu=x_im1 + mu, sigma=sigma).logp(x_i) - return self.init.logp(x[0]) + at.sum(innov_like) - return self.init.logp(x) - - def random(self, point=None, size=None): - """Draw random values from GaussianRandomWalk. - - Parameters - ---------- - point : dict or Point, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size : int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # sigma, mu = distribution.draw_values([self.sigma, self.mu], point=point, size=size) - # return distribution.generate_samples( - # self._random, - # sigma=sigma, - # mu=mu, - # size=size, - # dist_shape=self.shape, - # not_broadcast_kwargs={"sample_shape": to_tuple(size)}, - # ) - pass - - def _random(self, sigma, mu, size, sample_shape): - """Implement a Gaussian random walk as a cumulative sum of normals. - axis = len(size) - 1 denotes the axis along which cumulative sum would be calculated. - This might need to be corrected in future when issue #4010 is fixed. - """ - if size[len(sample_shape)] == sample_shape: - axis = len(sample_shape) - else: - axis = len(size) - 1 - rv = stats.norm(mu, sigma) - data = rv.rvs(size).cumsum(axis=axis) - data = np.array(data) - - # the following lines center the random walk to start at the origin. - if len(data.shape) > 1: - for i in range(data.shape[0]): - data[i] = data[i] - data[i][0] - else: - data = data - data[0] - return data - - def _distr_parameters_for_repr(self): - return ["mu", "sigma"] - - class GARCH11(distribution.Continuous): r""" GARCH(1,1) with Normal innovations. The model is specified by diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py index 94ab4e7fb7..374b9d497c 100644 --- a/pymc/tests/test_distributions.py +++ b/pymc/tests/test_distributions.py @@ -2619,6 +2619,22 @@ def test_interpolated_transform(self, transform): assert not np.isfinite(m.compile_logp()({"x": -1.0})) assert not np.isfinite(m.compile_logp()({"x": 11.0})) + def test_gaussianrandomwalk(self): + def ref_logp(value, mu, sigma, steps): + # Relying on fact that init will be normal by default + return ( + scipy.stats.norm.logpdf(value[0], mu, sigma) + + scipy.stats.norm.logpdf(np.diff(value), mu, sigma).sum() + ) + + self.check_logp( + pm.GaussianRandomWalk, + Vector(R, 4), + {"mu": R, "sigma": Rplus, "steps": Nat}, + ref_logp, + decimal=select_by_precision(float64=6, float32=1), + ) + class TestBound: """Tests for pm.Bound distribution""" diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index 78bd3fc223..00a002828f 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -157,109 +157,6 @@ def pymc_random_discrete( assert p > alpha, str(pt) -class BaseTestCases: - class BaseTestCase(SeededTest): - shape = 5 - # the following are the default values of the distribution that take effect - # when the parametrized shape/size in the test case is None. - # For every distribution that defaults to non-scalar shapes they must be - # specified by the inheriting Test class. example: TestGaussianRandomWalk - default_shape = () - default_size = () - - def setup_method(self, *args, **kwargs): - super().setup_method(*args, **kwargs) - self.model = pm.Model() - - def get_random_variable(self, shape, with_vector_params=False, name=None): - """Creates a RandomVariable of the parametrized distribution.""" - if with_vector_params: - params = { - key: value * np.ones(self.shape, dtype=np.dtype(type(value))) - for key, value in self.params.items() - } - else: - params = self.params - if name is None: - name = self.distribution.__name__ - with self.model: - try: - if shape is None: - # in the test case parametrization "None" means "no specified (default)" - return self.distribution(name, transform=None, **params) - else: - ndim_supp = self.distribution.rv_op.ndim_supp - if ndim_supp == 0: - size = shape - else: - size = shape[:-ndim_supp] - return self.distribution(name, size=size, transform=None, **params) - except TypeError: - if np.sum(np.atleast_1d(shape)) == 0: - pytest.skip("Timeseries must have positive shape") - raise - - @staticmethod - def sample_random_variable(random_variable, size): - """Draws samples from a RandomVariable.""" - if size: - random_variable = change_rv_size(random_variable, size, expand=True) - return random_variable.eval() - - @pytest.mark.parametrize("size", [None, (), 1, (1,), 5, (4, 5)], ids=str) - @pytest.mark.parametrize("shape", [None, ()], ids=str) - def test_scalar_distribution_shape(self, shape, size): - """Draws samples of different [size] from a scalar [shape] RV.""" - rv = self.get_random_variable(shape) - exp_shape = self.default_shape if shape is None else tuple(np.atleast_1d(shape)) - exp_size = self.default_size if size is None else tuple(np.atleast_1d(size)) - expected = exp_size + exp_shape - actual = np.shape(self.sample_random_variable(rv, size)) - assert ( - expected == actual - ), f"Sample size {size} from {shape}-shaped RV had shape {actual}. Expected: {expected}" - # check that negative size raises an error - with pytest.raises(ValueError): - self.sample_random_variable(rv, size=-2) - with pytest.raises(ValueError): - self.sample_random_variable(rv, size=(3, -2)) - - @pytest.mark.parametrize("size", [None, ()], ids=str) - @pytest.mark.parametrize( - "shape", [None, (), (1,), (1, 1), (1, 2), (10, 11, 1), (9, 10, 2)], ids=str - ) - def test_scalar_sample_shape(self, shape, size): - """Draws samples of scalar [size] from a [shape] RV.""" - rv = self.get_random_variable(shape) - exp_shape = self.default_shape if shape is None else tuple(np.atleast_1d(shape)) - exp_size = self.default_size if size is None else tuple(np.atleast_1d(size)) - expected = exp_size + exp_shape - actual = np.shape(self.sample_random_variable(rv, size)) - assert ( - expected == actual - ), f"Sample size {size} from {shape}-shaped RV had shape {actual}. Expected: {expected}" - - @pytest.mark.parametrize("size", [None, 3, (4, 5)], ids=str) - @pytest.mark.parametrize("shape", [None, 1, (10, 11, 1)], ids=str) - def test_vector_params(self, shape, size): - shape = self.shape - rv = self.get_random_variable(shape, with_vector_params=True) - exp_shape = self.default_shape if shape is None else tuple(np.atleast_1d(shape)) - exp_size = self.default_size if size is None else tuple(np.atleast_1d(size)) - expected = exp_size + exp_shape - actual = np.shape(self.sample_random_variable(rv, size)) - assert ( - expected == actual - ), f"Sample size {size} from {shape}-shaped RV had shape {actual}. Expected: {expected}" - - -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestGaussianRandomWalk(BaseTestCases.BaseTestCase): - distribution = pm.GaussianRandomWalk - params = {"mu": 1.0, "sigma": 1.0} - default_shape = (1,) - - class BaseTestDistributionRandom(SeededTest): """ This class provides a base for tests that new RandomVariables are correctly @@ -365,6 +262,11 @@ def check_pymc_params_match_rv_op(self): for (expected_name, expected_value), actual_variable in zip( self.expected_rv_op_params.items(), aesara_dist_inputs ): + + # Add additional line to evaluate symbolic inputs to distributions + if isinstance(expected_value, aesara.tensor.Variable): + expected_value = expected_value.eval() + assert_almost_equal(expected_value, actual_variable.eval(), decimal=self.decimal) def check_rv_size(self): diff --git a/pymc/tests/test_distributions_timeseries.py b/pymc/tests/test_distributions_timeseries.py index 3736b0134b..05bdeba76d 100644 --- a/pymc/tests/test_distributions_timeseries.py +++ b/pymc/tests/test_distributions_timeseries.py @@ -11,21 +11,120 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import numpy as np import pytest +import scipy.stats + +import pymc as pm from pymc.aesaraf import floatX from pymc.distributions.continuous import Flat, Normal -from pymc.distributions.timeseries import AR, AR1, GARCH11, EulerMaruyama +from pymc.distributions.timeseries import ( + AR, + AR1, + GARCH11, + EulerMaruyama, + GaussianRandomWalk, +) from pymc.model import Model from pymc.sampling import sample, sample_posterior_predictive from pymc.tests.helpers import select_by_precision +from pymc.tests.test_distributions_random import BaseTestDistributionRandom + + +class TestGaussianRandomWalkRandom(BaseTestDistributionRandom): + # Override default size for test class + size = None + + pymc_dist = pm.GaussianRandomWalk + pymc_dist_params = {"mu": 1.0, "sigma": 2, "init": pm.Constant.dist(0), "steps": 4} + expected_rv_op_params = {"mu": 1.0, "sigma": 2, "init": pm.Constant.dist(0), "steps": 4} + + checks_to_run = [ + "check_pymc_params_match_rv_op", + "check_rv_inferred_size", + ] + + def check_rv_inferred_size(self): + steps = self.pymc_dist_params["steps"] + sizes_to_check = [None, (), 1, (1,)] + sizes_expected = [(steps + 1,), (steps + 1,), (1, steps + 1), (1, steps + 1)] + + for size, expected in zip(sizes_to_check, sizes_expected): + pymc_rv = self.pymc_dist.dist(**self.pymc_dist_params, size=size) + expected_symbolic = tuple(pymc_rv.shape.eval()) + assert expected_symbolic == expected + + def test_steps_scalar_check(self): + with pytest.raises(ValueError, match="steps must be an integer scalar"): + self.pymc_dist.dist(steps=[1]) + + +def test_gaussianrandomwalk_inference(): + mu, sigma, steps = 2, 1, 1000 + obs = np.concatenate([[0], np.random.normal(mu, sigma, size=steps)]).cumsum() + + with pm.Model(): + _mu = pm.Uniform("mu", -10, 10) + _sigma = pm.Uniform("sigma", 0, 10) + + obs_data = pm.MutableData("obs_data", obs) + grw = GaussianRandomWalk("grw", _mu, _sigma, steps=steps, observed=obs_data) + + trace = pm.sample(chains=1) -# pytestmark = pytest.mark.usefixtures("seeded_test") -pytestmark = pytest.mark.xfail(reason="Timeseries not refactored") + recovered_mu = trace.posterior["mu"].mean() + recovered_sigma = trace.posterior["sigma"].mean() + np.testing.assert_allclose([mu, sigma], [recovered_mu, recovered_sigma], atol=0.2) + + +@pytest.mark.parametrize("init", [None, pm.Normal.dist()]) +def test_gaussian_random_walk_init_dist_shape(init): + """Test that init_dist is properly resized""" + grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init) + assert tuple(grw.owner.inputs[-2].shape.eval()) == () + + grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, size=(5,)) + assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,) + + grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, shape=1) + assert tuple(grw.owner.inputs[-2].shape.eval()) == () + + grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init=init, shape=(5, 1)) + assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,) + + grw = pm.GaussianRandomWalk.dist(mu=[0, 0], sigma=1, steps=1, init=init) + assert tuple(grw.owner.inputs[-2].shape.eval()) == (2,) + + grw = pm.GaussianRandomWalk.dist(mu=0, sigma=[1, 1], steps=1, init=init) + assert tuple(grw.owner.inputs[-2].shape.eval()) == (2,) + + grw = pm.GaussianRandomWalk.dist(mu=np.zeros((3, 1)), sigma=[1, 1], steps=1, init=init) + assert tuple(grw.owner.inputs[-2].shape.eval()) == (3, 2) + + +def test_gaussianrandomwalk_broadcasted_by_init_dist(): + grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=4, init=pm.Normal.dist(size=(2, 3))) + assert tuple(grw.shape.eval()) == (2, 3, 5) + assert grw.eval().shape == (2, 3, 5) + + +@pytest.mark.parametrize( + "init", + [ + pm.HalfNormal.dist(sigma=2), + pm.StudentT.dist(nu=4, mu=1, sigma=0.5), + ], +) +def test_gaussian_random_walk_init_dist_logp(init): + grw = pm.GaussianRandomWalk.dist(init=init, steps=1) + assert np.isclose( + pm.logp(grw, [0, 0]).eval(), + pm.logp(init, 0).eval() + scipy.stats.norm.logpdf(0), + ) +@pytest.mark.xfail(reason="Timeseries not refactored") def test_AR(): # AR1 data = np.array([0.3, 1, 2, 3, 4]) @@ -63,6 +162,7 @@ def test_AR(): np.testing.assert_allclose(ar_like, reg_like) +@pytest.mark.xfail(reason="Timeseries not refactored") def test_AR_nd(): # AR2 multidimensional p, T, n = 3, 100, 5 @@ -82,6 +182,7 @@ def test_AR_nd(): ) +@pytest.mark.xfail(reason="Timeseries not refactored") def test_GARCH11(): # test data ~ N(0, 1) data = np.array( @@ -142,6 +243,7 @@ def _gen_sde_path(sde, pars, dt, n, x0): return np.array(xs) +@pytest.mark.xfail(reason="Timeseries not refactored") def test_linear(): lam = -0.78 sig2 = 5e-3 diff --git a/scripts/run_mypy.py b/scripts/run_mypy.py index 296e389e92..17d9fb18f7 100644 --- a/scripts/run_mypy.py +++ b/scripts/run_mypy.py @@ -35,7 +35,6 @@ pymc/distributions/discrete.py pymc/distributions/shape_utils.py pymc/distributions/simulator.py -pymc/distributions/timeseries.py pymc/distributions/transforms.py pymc/exceptions.py pymc/func_utils.py