Skip to content

Commit 53b642e

Browse files
authored
Conditional autoregression distribution (#4504)
* Add conditional autoregression distribution (CAR)
1 parent a7c5646 commit 53b642e

File tree

5 files changed

+166
-0
lines changed

5 files changed

+166
-0
lines changed

RELEASE-NOTES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
### New Features
88
+ `pm.math.cartesian` can now handle inputs that are themselves >1D (see [#4482](https://github.com/pymc-devs/pymc3/pull/4482)).
9+
+ The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models.
910
+ ...
1011

1112
### Maintenance

docs/source/api/distributions/multivariate.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Multivariate
1515
Multinomial
1616
Dirichlet
1717
DirichletMultinomial
18+
CAR
1819

1920
.. automodule:: pymc3.distributions.multivariate
2021
:members:

pymc3/distributions/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
)
8181
from pymc3.distributions.mixture import Mixture, MixtureSameFamily, NormalMixture
8282
from pymc3.distributions.multivariate import (
83+
CAR,
8384
Dirichlet,
8485
DirichletMultinomial,
8586
KroneckerNormal,
@@ -184,4 +185,5 @@
184185
"Simulator",
185186
"fast_sample_posterior_predictive",
186187
"BART",
188+
"CAR",
187189
]

pymc3/distributions/multivariate.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
"LKJCholeskyCov",
6767
"MatrixNormal",
6868
"KroneckerNormal",
69+
"CAR",
6970
]
7071

7172

@@ -2089,3 +2090,127 @@ def logp(self, value):
20892090

20902091
def _distr_parameters_for_repr(self):
20912092
return ["mu"]
2093+
2094+
2095+
class CAR(Continuous):
2096+
R"""
2097+
Likelihood for a conditional autoregression. This is a special case of the
2098+
multivariate normal with an adjacency-structured covariance matrix.
2099+
2100+
.. math::
2101+
2102+
f(x \mid W, \alpha, \tau) =
2103+
\frac{|T|^{1/2}}{(2\pi)^{k/2}}
2104+
\exp\left\{ -\frac{1}{2} (x-\mu)^{\prime} T^{-1} (x-\mu) \right\}
2105+
2106+
where :math:`T = (\tau D(I-\alpha W))^{-1}` and :math:`D = diag(\sum_i W_{ij})`.
2107+
2108+
======== ==========================
2109+
Support :math:`x \in \mathbb{R}^k`
2110+
Mean :math:`\mu \in \mathbb{R}^k`
2111+
Variance :math:`(\tau D(I-\alpha W))^{-1}`
2112+
======== ==========================
2113+
2114+
Parameters
2115+
----------
2116+
mu: array
2117+
Real-valued mean vector
2118+
W: Numpy matrix
2119+
Symmetric adjacency matrix of 1s and 0s indicating
2120+
adjacency between elements.
2121+
alpha: float or array
2122+
Autoregression parameter taking values between -1 and 1. Values closer to 0 indicate weaker
2123+
correlation and values closer to 1 indicate higher autocorrelation. For most use cases, the
2124+
support of alpha should be restricted to (0, 1)
2125+
tau: float or array
2126+
Positive precision variable controlling the scale of the underlying normal variates.
2127+
sparse: bool, default=False
2128+
Determines whether or not sparse computations are used
2129+
2130+
References
2131+
----------
2132+
.. Jin, X., Carlin, B., Banerjee, S.
2133+
"Generalized Hierarchical Multivariate CAR Models for Areal Data"
2134+
Biometrics, Vol. 61, No. 4 (Dec., 2005), pp. 950-961
2135+
"""
2136+
2137+
def __init__(self, mu, W, alpha, tau, sparse=False, *args, **kwargs):
2138+
super().__init__(*args, **kwargs)
2139+
2140+
D = W.sum(axis=0)
2141+
d, _ = W.shape
2142+
2143+
self.d = d
2144+
self.median = self.mode = self.mean = self.mu = aet.as_tensor_variable(mu)
2145+
self.sparse = sparse
2146+
2147+
if not W.ndim == 2 or not np.allclose(W, W.T):
2148+
raise ValueError("W must be a symmetric adjacency matrix.")
2149+
2150+
if sparse:
2151+
W_sparse = scipy.sparse.csr_matrix(W)
2152+
self.W = aesara.sparse.as_sparse_variable(W_sparse)
2153+
else:
2154+
self.W = aet.as_tensor_variable(W)
2155+
2156+
# eigenvalues of D^−1/2 * W * D^−1/2
2157+
Dinv_sqrt = np.diag(1 / np.sqrt(D))
2158+
DWD = np.matmul(np.matmul(Dinv_sqrt, W), Dinv_sqrt)
2159+
self.lam = scipy.linalg.eigvalsh(DWD)
2160+
self.D = aet.as_tensor_variable(D)
2161+
2162+
tau = aet.as_tensor_variable(tau)
2163+
if tau.ndim > 0:
2164+
self.tau = tau[:, None]
2165+
else:
2166+
self.tau = tau
2167+
2168+
alpha = aet.as_tensor_variable(alpha)
2169+
if alpha.ndim > 0:
2170+
self.alpha = alpha[:, None]
2171+
else:
2172+
self.alpha = alpha
2173+
2174+
def logp(self, value):
2175+
"""
2176+
Calculate log-probability of a CAR-distributed vector
2177+
at specified value. This log probability function differs from
2178+
the true CAR log density (AKA a multivariate normal with CAR-structured
2179+
covariance matrix) by an additive constant.
2180+
2181+
Parameters
2182+
----------
2183+
value: array
2184+
Value for which log-probability is calculated.
2185+
2186+
Returns
2187+
-------
2188+
TensorVariable
2189+
"""
2190+
2191+
if value.ndim == 1:
2192+
value = value[None, :]
2193+
2194+
logtau = self.d * aet.log(self.tau).sum()
2195+
logdet = aet.log(1 - self.alpha.T * self.lam[:, None]).sum()
2196+
delta = value - self.mu
2197+
2198+
if self.sparse:
2199+
Wdelta = aesara.sparse.dot(delta, self.W)
2200+
else:
2201+
Wdelta = aet.dot(delta, self.W)
2202+
2203+
tau_dot_delta = self.D[None, :] * delta - self.alpha * Wdelta
2204+
logquad = (self.tau * delta * tau_dot_delta).sum(axis=-1)
2205+
return bound(
2206+
0.5 * (logtau + logdet - logquad),
2207+
aet.all(self.alpha <= 1),
2208+
aet.all(self.alpha >= -1),
2209+
self.tau > 0,
2210+
)
2211+
2212+
def random(self, point=None, size=None):
2213+
raise NotImplementedError("Sampling from a CAR distribution is not supported.")
2214+
2215+
def _distr_parameters_for_repr(self):
2216+
return ["mu", "W", "alpha", "tau"]

pymc3/tests/test_distributions.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from pymc3.blocking import DictToVarBijection
3838
from pymc3.distributions import (
3939
AR1,
40+
CAR,
4041
AsymmetricLaplace,
4142
Bernoulli,
4243
Beta,
@@ -2614,6 +2615,42 @@ def test_orderedlogistic_dimensions(shape):
26142615
assert np.allclose(ologp, expected)
26152616

26162617

2618+
@pytest.mark.parametrize("shape", [(4,), (4, 1), (4, 4)], ids=str)
2619+
def test_car_logp(shape):
2620+
"""
2621+
Tests the log probability function for the CAR distribution by checking
2622+
against Scipy's multivariate normal logpdf, up to an additive constant.
2623+
The formula used by the CAR logp implementation omits several additive terms.
2624+
"""
2625+
np.random.seed(1)
2626+
2627+
xs = np.random.randn(*shape)
2628+
2629+
# d x d adjacency matrix for a square (d=4) of rook-adjacent sites
2630+
W = np.array(
2631+
[[0.0, 1.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]
2632+
)
2633+
2634+
tau = 2.0
2635+
alpha = 0.5
2636+
mu = np.zeros(4)
2637+
2638+
# Compute CAR covariance matrix and resulting MVN logp
2639+
D = W.sum(axis=0)
2640+
prec = tau * (np.diag(D) - alpha * W)
2641+
cov = np.linalg.inv(prec)
2642+
scipy_logp = scipy.stats.multivariate_normal.logpdf(xs, mu, cov)
2643+
2644+
car_logp = CAR.dist(mu, W, alpha, tau, shape=shape).logp(xs).eval()
2645+
2646+
# Check to make sure that the CAR and MVN log PDFs are equivalent
2647+
# up to an additive constant which is independent of the CAR parameters
2648+
delta_logp = scipy_logp - car_logp
2649+
2650+
# Check to make sure all the delta values are identical.
2651+
assert np.allclose(delta_logp - delta_logp[0], 0.0)
2652+
2653+
26172654
class TestBugfixes:
26182655
@pytest.mark.parametrize(
26192656
"dist_cls,kwargs", [(MvNormal, dict(mu=0)), (MvStudentT, dict(mu=0, nu=2))]

0 commit comments

Comments
 (0)