diff --git a/pymc_experimental/distributions/__init__.py b/pymc_experimental/distributions/__init__.py index 468c4cc0..c661160e 100644 --- a/pymc_experimental/distributions/__init__.py +++ b/pymc_experimental/distributions/__init__.py @@ -17,7 +17,7 @@ Experimental probability distributions for stochastic nodes in PyMC. """ -from pymc_experimental.distributions.continuous import GenExtreme +from pymc_experimental.distributions.continuous import GenExtreme, PCPriorStudentT_dof from pymc_experimental.distributions.discrete import GeneralizedPoisson from pymc_experimental.distributions.histogram_utils import histogram_approximation from pymc_experimental.distributions.multivariate import R2D2M2CP @@ -27,6 +27,7 @@ "DiscreteMarkovChain", "GeneralizedPoisson", "GenExtreme", + "PCPriorStudentT_dof", "R2D2M2CP", "histogram_approximation", ] diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py index 8419bce5..497d03ac 100644 --- a/pymc_experimental/distributions/continuous.py +++ b/pymc_experimental/distributions/continuous.py @@ -19,18 +19,28 @@ The imports from pymc are not fully replicated here: add imports as necessary. """ -from typing import List, Tuple, Union +from typing import List, Optional, Tuple, Union import numpy as np import pytensor.tensor as pt -from pymc.distributions.dist_math import check_parameters +from pymc.distributions.continuous import ( + DIST_PARAMETER_TYPES, + PositiveContinuous, + check_parameters, +) from pymc.distributions.distribution import Continuous from pymc.distributions.shape_utils import rv_size_is_none from pymc.pytensorf import floatX +from pytensor.tensor import TensorVariable from pytensor.tensor.random.op import RandomVariable -from pytensor.tensor.variable import TensorVariable from scipy import stats +from pymc_experimental.distributions.dist_math import ( + pc_prior_studentt_kld_dist_inv_op, + pc_prior_studentt_logp, + studentt_kld_distance, +) + class GenExtremeRV(RandomVariable): name: str = "Generalized Extreme Value" @@ -216,3 +226,62 @@ def moment(rv, size, mu, sigma, xi): if not rv_size_is_none(size): mode = pt.full(size, mode) return mode + + +class PCPriorStudentT_dof_RV(RandomVariable): + name = "pc_prior_studentt_dof" + ndim_supp = 0 + ndims_params = [0] + dtype = "floatX" + _print_name = ("PCTDoF", "\\operatorname{PCPriorStudentT_dof}") + + @classmethod + def rng_fn(cls, rng, lam, size=None) -> np.ndarray: + return pc_prior_studentt_kld_dist_inv_op.spline(rng.exponential(scale=1.0 / lam, size=size)) + + +pc_prior_studentt_dof = PCPriorStudentT_dof_RV() + + +class PCPriorStudentT_dof(PositiveContinuous): + + rv_op = pc_prior_studentt_dof + + @classmethod + def dist( + cls, + alpha: Optional[DIST_PARAMETER_TYPES] = None, + U: Optional[DIST_PARAMETER_TYPES] = None, + lam: Optional[DIST_PARAMETER_TYPES] = None, + *args, + **kwargs + ): + lam = cls.get_lam(alpha, U, lam) + return super().dist([lam], *args, **kwargs) + + def moment(rv, size, lam): + mean = pc_prior_studentt_kld_dist_inv_op(1.0 / lam) + if not rv_size_is_none(size): + mean = pt.full(size, mean) + return mean + + @classmethod + def get_lam(cls, alpha=None, U=None, lam=None): + if (alpha is not None) and (U is not None): + return -np.log(alpha) / studentt_kld_distance(U) + elif lam is not None: + return lam + else: + raise ValueError( + "Incompatible parameterization. Either use alpha and U, or lam to specify the " + "distribution." + ) + + def logp(value, lam): + res = pc_prior_studentt_logp(value, lam) + res = pt.switch( + pt.lt(value, 2 + 1e-6), # 2 + 1e-6 smallest value for nu + -np.inf, + res, + ) + return check_parameters(res, lam > 0, msg="lam > 0") diff --git a/pymc_experimental/distributions/dist_math.py b/pymc_experimental/distributions/dist_math.py new file mode 100644 index 00000000..76b78dea --- /dev/null +++ b/pymc_experimental/distributions/dist_math.py @@ -0,0 +1,90 @@ +# Copyright 2023 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +import numpy as np +import pytensor.tensor as pt +from pymc.distributions.dist_math import SplineWrapper +from scipy.interpolate import UnivariateSpline + + +def studentt_kld_distance(nu): + """ + 2 * sqrt(KL divergence divergence) between a student t and a normal random variable. Derived + by Tang in https://arxiv.org/abs/1811.08042. + """ + return pt.sqrt( + 1 + + pt.log(2 * pt.reciprocal(nu - 2)) + + 2 * pt.gammaln((nu + 1) / 2) + - 2 * pt.gammaln(nu / 2) + - (nu + 1) * (pt.digamma((nu + 1) / 2) - pt.digamma(nu / 2)) + ) + + +def tri_gamma_approx(x): + """Derivative of the digamma function, or second derivative of the gamma function. This is a + series expansion taken from wikipedia: https://en.wikipedia.org/wiki/Trigamma_function. When + the trigamma function in pytensor implements a gradient this function can be removed and + replaced. + """ + return ( + 1 / x + + (1 / (2 * x**2)) + + (1 / (6 * x**3)) + - (1 / (30 * x**5)) + + (1 / (42 * x**7)) + - (1 / (30 * x**9)) + + (5 / (66 * x**11)) + - (691 / (2730 * x**13)) + + (7 / (6 * x**15)) + ) + + +def pc_prior_studentt_logp(nu, lam): + """The log probability density function for the PC prior for the degrees of freedom in a + student t likelihood. Derived by Tang in https://arxiv.org/abs/1811.08042. + """ + return ( + pt.log(lam) + + pt.log( + (1 / (nu - 2)) + + ((nu + 1) / 2) * (tri_gamma_approx((nu + 1) / 2) - tri_gamma_approx(nu / 2)) + ) + - pt.log(4 * studentt_kld_distance(nu)) + - lam * studentt_kld_distance(nu) + + pt.log(2) + ) + + +def _make_pct_inv_func(): + """This function constructs a numerical approximation to the inverse of the KLD distance + function, `studentt_kld_distance`. It does a spline fit for degrees of freedom values + from 2 + 1e-6 to 4000. 2 is the smallest valid value for the student t degrees of freedom, and + values above 4000 don't seem to change much (nearly Gaussian past 30). It's then wrapped by + `SplineWrapper` so it can be used as a PyTensor op. + """ + NU_MIN = 2.0 + 1e-6 + nu = np.concatenate((np.linspace(NU_MIN, 2.4, 2000), np.linspace(2.4 + 1e-4, 4000, 10000))) + return UnivariateSpline( + studentt_kld_distance(nu).eval()[::-1], + nu[::-1], + ext=3, + k=3, + s=0, + ) + + +pc_prior_studentt_kld_dist_inv_op = SplineWrapper(_make_pct_inv_func()) diff --git a/pymc_experimental/gp/latent_approx.py b/pymc_experimental/gp/latent_approx.py index c4a5f761..c2f97e68 100644 --- a/pymc_experimental/gp/latent_approx.py +++ b/pymc_experimental/gp/latent_approx.py @@ -17,7 +17,8 @@ import pymc as pm import pytensor.tensor as pt from pymc.gp.util import JITTER_DEFAULT, stabilize -from pytensor.tensor.linalg import cholesky, solve_triangular +from pytensor.tensor.linalg import cholesky +from pytensor.tensor.slinalg import solve_triangular solve_lower = partial(solve_triangular, lower=True) solve_upper = partial(solve_triangular, lower=False) diff --git a/pymc_experimental/tests/distributions/test_continuous.py b/pymc_experimental/tests/distributions/test_continuous.py index a43d05b9..f5aa137a 100644 --- a/pymc_experimental/tests/distributions/test_continuous.py +++ b/pymc_experimental/tests/distributions/test_continuous.py @@ -14,6 +14,7 @@ import platform import numpy as np +import numpy.testing as npt import pymc as pm # general imports @@ -35,7 +36,32 @@ ) # the distributions to be tested -from pymc_experimental.distributions import GenExtreme +from pymc_experimental.distributions import GenExtreme, PCPriorStudentT_dof + + +class TestPCPriorStudentT_dof: + """The test compares the result to what's implemented in INLA. Since it's a specialized + distribution the user shouldn't ever draw random samples from it, calculate the logcdf, or + any of that. The log-probability won't match up exactly to INLA. INLA uses a numeric + approximation and this implementation uses an exact solution for the log-probability. Some + numerical approximations are needed for drawing random samples though. + """ + + @pytest.mark.parametrize( + "test_case", + [ + {"U": 30, "alpha": 0.5, "dof": 5, "inla_result": -4.792407}, + {"U": 30, "alpha": 0.5, "dof": 500, "inla_result": -9.464625}, + {"U": 30, "alpha": 0.5, "dof": 1, "inla_result": -np.inf}, # actually INLA throws error + {"U": 30, "alpha": 0.1, "dof": 5, "inla_result": -15.25691}, + {"U": 30, "alpha": 0.9, "dof": 5, "inla_result": -2.416043}, + {"U": 5, "alpha": 0.99, "dof": 5, "inla_result": -5.992945}, + {"U": 5, "alpha": 0.01, "dof": 5, "inla_result": -4.460736}, + ], + ) + def test_logp(self, test_case): + d = PCPriorStudentT_dof.dist(U=test_case["U"], alpha=test_case["alpha"]) + npt.assert_allclose(pm.logp(d, test_case["dof"]).eval(), test_case["inla_result"], rtol=0.1) class TestGenExtremeClass: