Merge branch 'main' into auto_versioning

Chris Fonnesbeck · Chris Fonnesbeck · commit 37230d337ea5 · 2022-09-01T13:33:17.000-05:00
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
@@ -2200,32 +2200,23 @@ def make_node(self, rng, size, dtype, alpha, K):
         alpha = at.as_tensor_variable(alpha)
         K = at.as_tensor_variable(intX(K))
 
-        if alpha.ndim > 0:
-            raise ValueError("The concentration parameter needs to be a scalar.")
-
         if K.ndim > 0:
             raise ValueError("K must be a scalar.")
 
         return super().make_node(rng, size, dtype, alpha, K)
 
-    def _infer_shape(self, size, dist_params, param_shapes=None):
-        alpha, K = dist_params
-
-        size = tuple(size)
-
-        return size + (K + 1,)
+    def _supp_shape_from_params(self, dist_params, **kwargs):
+        K = dist_params[1]
+        return (K + 1,)
 
     @classmethod
     def rng_fn(cls, rng, alpha, K, size):
         if K < 0:
             raise ValueError("K needs to be positive.")
 
-        if size is None:
-            size = (K,)
-        elif isinstance(size, int):
-            size = (size,) + (K,)
-        else:
-            size = tuple(size) + (K,)
+        size = to_tuple(size) if size is not None else alpha.shape
+        size = size + (K,)
+        alpha = alpha[..., np.newaxis]
 
         betas = rng.beta(1, alpha, size=size)
 
@@ -2294,9 +2285,10 @@ def dist(cls, alpha, K, *args, **kwargs):
         return super().dist([alpha, K], **kwargs)
 
     def moment(rv, size, alpha, K):
+        alpha = alpha[..., np.newaxis]
         moment = (alpha / (1 + alpha)) ** at.arange(K)
         moment *= 1 / (1 + alpha)
-        moment = at.concatenate([moment, [(alpha / (1 + alpha)) ** K]], axis=-1)
+        moment = at.concatenate([moment, (alpha / (1 + alpha)) ** K], axis=-1)
         if not rv_size_is_none(size):
             moment_size = at.concatenate(
                 [
diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
@@ -685,18 +685,13 @@ def __init__(self, approx="VFE", *, mean_func=Zero(), cov_func=Constant(0.0)):
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
-        # new_gp will default to FITC approx
         new_gp = super().__add__(other)
-        # make sure new gp has correct approx
         if not self.approx == other.approx:
             raise TypeError("Cannot add GPs with different approximations")
         new_gp.approx = self.approx
         return new_gp
 
-    # Use y as first argument, so that we can use functools.partial
-    # in marginal_likelihood instead of lambda. This makes pickling
-    # possible.
-    def _build_marginal_likelihood_logp(self, y, X, Xu, sigma, jitter):
+    def _build_marginal_likelihood_loglik(self, y, X, Xu, sigma, jitter):
         sigma2 = at.square(sigma)
         Kuu = self.cov_func(Xu)
         Kuf = self.cov_func(Xu, X)
@@ -725,9 +720,7 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma, jitter):
         quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c))
         return -1.0 * (constant + logdet + quadratic + trace)
 
-    def marginal_likelihood(
-        self, name, X, Xu, y, noise=None, is_observed=True, jitter=JITTER_DEFAULT, **kwargs
-    ):
+    def marginal_likelihood(self, name, X, Xu, y, noise=None, jitter=JITTER_DEFAULT, **kwargs):
         R"""
         Returns the approximate marginal likelihood distribution, given the input
         locations `X`, inducing point locations `Xu`, data `y`, and white noise
@@ -747,9 +740,6 @@ def marginal_likelihood(
             noise.  Must have shape `(n, )`.
         noise: scalar, Variable
             Standard deviation of the Gaussian noise.
-        is_observed: bool
-            Whether to set `y` as an `observed` variable in the `model`.
-            Default is `True`.
         jitter: scalar
             A small correction added to the diagonal of positive semi-definite
             covariance matrices to ensure numerical stability.
@@ -767,38 +757,8 @@ def marginal_likelihood(
         else:
             self.sigma = noise
 
-        if is_observed:
-            return pm.DensityDist(
-                name,
-                X,
-                Xu,
-                self.sigma,
-                jitter,
-                logp=self._build_marginal_likelihood_logp,
-                observed=y,
-                ndims_params=[2, 2, 0],
-                size=X.shape[0],
-                **kwargs,
-            )
-        else:
-            warnings.warn(
-                "The 'is_observed' argument has been deprecated.  If the GP is "
-                "unobserved use gp.Latent instead.",
-                FutureWarning,
-            )
-            return pm.DensityDist(
-                name,
-                X,
-                Xu,
-                self.sigma,
-                jitter,
-                logp=self._build_marginal_likelihood_logp,
-                observed=y,
-                ndims_params=[2, 2, 0],
-                # ndim_supp=1,
-                size=X.shape[0],
-                **kwargs,
-            )
+        approx_loglik = self._build_marginal_likelihood_loglik(y, X, Xu, noise, jitter)
+        pm.Potential(f"marginalapprox_loglik_{name}", approx_loglik, **kwargs)
 
     def _build_conditional(
         self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total, jitter
diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py
@@ -24,6 +24,7 @@
 from aeppl.logprob import ParameterValueError
 from aesara.tensor.random.utils import broadcast_params
 
+from pymc.aesaraf import compile_pymc
 from pymc.distributions.continuous import get_tau_sigma
 from pymc.util import UNSET
 
@@ -953,6 +954,17 @@ def test_hierarchical_obs_logp():
     assert not any(isinstance(o, RandomVariable) for o in ops)
 
 
+@pytest.fixture(scope="module")
+def stickbreakingweights_logpdf():
+    _value = at.vector()
+    _alpha = at.scalar()
+    _k = at.iscalar()
+    _logp = logp(StickBreakingWeights.dist(_alpha, _k), _value)
+    core_fn = compile_pymc([_value, _alpha, _k], _logp)
+
+    return np.vectorize(core_fn, signature="(n),(),()->()")
+
+
 class TestMatchesScipy:
     def test_uniform(self):
         check_logp(
@@ -2318,6 +2330,25 @@ def test_stickbreakingweights_invalid(self):
         assert pm.logp(sbw, np.array([0.4, 0.3, 0.2, -0.1])).eval() == -np.inf
         assert pm.logp(sbw_wrong_K, np.array([0.4, 0.3, 0.2, 0.1])).eval() == -np.inf
 
+    @pytest.mark.parametrize(
+        "alpha,K",
+        [
+            (np.array([0.5, 1.0, 2.0]), 3),
+            (np.arange(1, 7, dtype="float64").reshape(2, 3), 5),
+        ],
+    )
+    def test_stickbreakingweights_vectorized(self, alpha, K, stickbreakingweights_logpdf):
+        value = pm.StickBreakingWeights.dist(alpha, K).eval()
+        with Model():
+            sbw = StickBreakingWeights("sbw", alpha=alpha, K=K, transform=None)
+        pt = {"sbw": value}
+        assert_almost_equal(
+            pm.logp(sbw, value).eval(),
+            stickbreakingweights_logpdf(value, alpha, K),
+            decimal=select_by_precision(float64=6, float32=2),
+            err_msg=str(pt),
+        )
+
     @aesara.config.change_flags(compute_test_value="raise")
     def test_categorical_bounds(self):
         with Model():
diff --git a/pymc/tests/test_distributions_moments.py b/pymc/tests/test_distributions_moments.py
@@ -1166,6 +1166,32 @@ def test_rice_moment(nu, sigma, size, expected):
                 fill_value=np.append((1 / 3) ** np.arange(5) * 2 / 3, (1 / 3) ** 5),
             ),
         ),
+        (
+            np.array([1, 3]),
+            11,
+            None,
+            np.array(
+                [
+                    np.append((1 / 2) ** np.arange(11) * 1 / 2, (1 / 2) ** 11),
+                    np.append((3 / 4) ** np.arange(11) * 1 / 4, (3 / 4) ** 11),
+                ]
+            ),
+        ),
+        (
+            np.array([1, 3, 5]),
+            9,
+            (5, 3),
+            np.full(
+                shape=(5, 3, 10),
+                fill_value=np.array(
+                    [
+                        np.append((1 / 2) ** np.arange(9) * 1 / 2, (1 / 2) ** 9),
+                        np.append((3 / 4) ** np.arange(9) * 1 / 4, (3 / 4) ** 9),
+                        np.append((5 / 6) ** np.arange(9) * 1 / 6, (5 / 6) ** 9),
+                    ]
+                ),
+            ),
+        ),
     ],
 )
 def test_stickbreakingweights_moment(alpha, K, size, expected):
diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py
@@ -1329,6 +1329,18 @@ def check_basic_properties(self):
         assert np.all(draws <= 1)
 
 
+class TestStickBreakingWeights_1D_alpha(BaseTestDistributionRandom):
+    pymc_dist = pm.StickBreakingWeights
+    pymc_dist_params = {"alpha": [1.0, 2.0, 3.0], "K": 19}
+    expected_rv_op_params = {"alpha": [1.0, 2.0, 3.0], "K": 19}
+    sizes_to_check = [None, (3,), (5, 3)]
+    sizes_expected = [(3, 20), (3, 20), (5, 3, 20)]
+    checks_to_run = [
+        "check_pymc_params_match_rv_op",
+        "check_rv_size",
+    ]
+
+
 class TestCategorical(BaseTestDistributionRandom):
     pymc_dist = pm.Categorical
     pymc_dist_params = {"p": np.array([0.28, 0.62, 0.10])}
diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
@@ -846,63 +846,71 @@ def testLatent2(self):
 
 class TestMarginalVsMarginalApprox:
     R"""
-    Compare logp of models Marginal and MarginalApprox.
-    Should be nearly equal when inducing points are same as inputs.
+    Compare test fits of models Marginal and MarginalApprox.
     """
 
     def setup_method(self):
-        X = np.random.randn(50, 3)
-        y = np.random.randn(50)
-        Xnew = np.random.randn(60, 3)
-        pnew = np.random.randn(60)
-        with pm.Model() as model:
-            cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
-            mean_func = pm.gp.mean.Constant(0.5)
-            gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func)
-            sigma = 0.1
-            f = gp.marginal_likelihood("f", X, y, noise=sigma)
-            p = gp.conditional("p", Xnew)
-        self.logp = model.compile_logp()({"p": pnew})
-        self.X = X
-        self.Xnew = Xnew
-        self.y = y
-        self.sigma = sigma
-        self.pnew = pnew
-        self.gp = gp
+        self.sigma = 0.1
+        self.x = np.linspace(-5, 5, 30)
+        self.y = np.random.normal(0.25 * self.x, self.sigma)
+        with pm.Model() as model:
+            cov_func = pm.gp.cov.Linear(1, c=0.0)
+            c = pm.Normal("c", mu=20.0, sigma=100.0)  # far from true value
+            mean_func = pm.gp.mean.Constant(c)
+            self.gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func)
+            sigma = pm.HalfNormal("sigma", sigma=100)
+            self.gp.marginal_likelihood("lik", self.x[:, None], self.y, sigma)
+            self.map_full = pm.find_MAP(method="bfgs")  # bfgs seems to work much better than lbfgsb
+
+        self.x_new = np.linspace(-6, 6, 20)
+
+        # Include additive Gaussian noise, return diagonal of predicted covariance matrix
+        with model:
+            self.pred_mu, self.pred_var = self.gp.predict(
+                self.x_new[:, None], point=self.map_full, pred_noise=True, diag=True
+            )
 
-    @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"])
-    def testApproximations(self, approx):
-        with pm.Model() as model:
-            cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
-            mean_func = pm.gp.mean.Constant(0.5)
-            gp = pm.gp.MarginalApprox(mean_func=mean_func, cov_func=cov_func, approx=approx)
-            f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma)
-            p = gp.conditional("p", self.Xnew)
-        approx_logp = model.compile_logp()({"p": self.pnew})
-        npt.assert_allclose(approx_logp, self.logp, atol=0, rtol=1e-2)
+        # Dont include additive Gaussian noise, return full predicted covariance matrix
+        with model:
+            self.pred_mu, self.pred_covar = self.gp.predict(
+                self.x_new[:, None], point=self.map_full, pred_noise=False, diag=False
+            )
 
     @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"])
-    def testPredictVar(self, approx):
+    def test_fits_and_preds(self, approx):
+        """Get MAP estimate for GP approximation, compare results and predictions to what's returned
+        by an unapproximated GP.  The tolerances are fairly wide, but narrow relative to initial
+        values of the unknown parameters.
+        """
+
         with pm.Model() as model:
-            cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
-            mean_func = pm.gp.mean.Constant(0.5)
+            cov_func = pm.gp.cov.Linear(1, c=0.0)
+            c = pm.Normal("c", mu=20.0, sigma=100.0, initval=-500.0)
+            mean_func = pm.gp.mean.Constant(c)
             gp = pm.gp.MarginalApprox(mean_func=mean_func, cov_func=cov_func, approx=approx)
-            f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma)
-            mu1, var1 = self.gp.predict(self.Xnew, diag=True)
-            mu2, var2 = gp.predict(self.Xnew, diag=True)
-        npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3)
-        npt.assert_allclose(var1, var2, atol=0, rtol=1e-3)
+            sigma = pm.HalfNormal("sigma", sigma=100, initval=50.0)
+            gp.marginal_likelihood("lik", self.x[:, None], self.x[:, None], self.y, sigma)
+            map_approx = pm.find_MAP(method="bfgs")
+
+        # Check MAP gets approximately correct result
+        npt.assert_allclose(self.map_full["c"], map_approx["c"], atol=0.01, rtol=0.1)
+        npt.assert_allclose(self.map_full["sigma"], map_approx["sigma"], atol=0.01, rtol=0.1)
+
+        # Check that predict (and conditional) work, include noise, with diagonal non-full pred var.
+        with model:
+            pred_mu_approx, pred_var_approx = gp.predict(
+                self.x_new[:, None], point=map_approx, pred_noise=True, diag=True
+            )
+        npt.assert_allclose(self.pred_mu, pred_mu_approx, atol=0.0, rtol=0.1)
+        npt.assert_allclose(self.pred_var, pred_var_approx, atol=0.0, rtol=0.1)
 
-    def testPredictCov(self):
-        with pm.Model() as model:
-            cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
-            mean_func = pm.gp.mean.Constant(0.5)
-            gp = pm.gp.MarginalApprox(mean_func=mean_func, cov_func=cov_func, approx="DTC")
-            f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma)
-            mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True)
-            mu2, cov2 = gp.predict(self.Xnew, pred_noise=True)
-        npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3)
-        npt.assert_allclose(cov1, cov2, atol=0, rtol=1e-3)
+        # Check that predict (and conditional) work, no noise, full pred covariance.
+        with model:
+            pred_mu_approx, pred_var_approx = gp.predict(
+                self.x_new[:, None], point=map_approx, pred_noise=True, diag=True
+            )
+        npt.assert_allclose(self.pred_mu, pred_mu_approx, atol=0.0, rtol=0.1)
+        npt.assert_allclose(self.pred_var, pred_var_approx, atol=0.0, rtol=0.1)
 
 
 class TestGPAdditive:
diff --git a/pymc/variational/opvi.py b/pymc/variational/opvi.py