Skewed Student-T distribution (#7252)

fonnesbeck · web-flow · commit 606d4ffc2382 · 2024-05-04T22:47:44.000+02:00
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -989,7 +989,7 @@ Thus, Thomas, Chris and I are pleased to announce that PyMC3 is now in Beta.
 * Benjamin Edwards <bedwards@cs.unm.edu>
 * Brian Naughton <briannaughton@gmail.com>
 * Chad Heyne <chadheyne@gmail.com>
-* Chris Fonnesbeck <chris.fonnesbeck@vanderbilt.edu>
+* Chris Fonnesbeck <fonnesbeck@gmail.com>
 * Corey Farwell <coreyf@rwell.org>
 * John Salvatier <jsalvatier@gmail.com>
 * Karlson Pfannschmidt <quietdeath@gmail.com>
diff --git a/docs/source/api/distributions/continuous.rst b/docs/source/api/distributions/continuous.rst
@@ -33,6 +33,7 @@ Continuous
    PolyaGamma
    Rice
    SkewNormal
+   SkewStudentT
    StudentT
    Triangular
    TruncatedNormal
diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py
@@ -41,6 +41,7 @@
     PolyaGamma,
     Rice,
     SkewNormal,
+    SkewStudentT,
     StudentT,
     Triangular,
     TruncatedNormal,
@@ -202,4 +203,5 @@
     "HurdleLogNormal",
     "HurdleNegativeBinomial",
     "HurdlePoisson",
+    "SkewStudentT",
 ]
diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
@@ -29,6 +29,7 @@
 from pytensor.graph.basic import Apply, Variable
 from pytensor.graph.op import Op
 from pytensor.raise_op import Assert
+from pytensor.tensor import gamma as gammafn
 from pytensor.tensor import gammaln
 from pytensor.tensor.extra_ops import broadcast_shape
 from pytensor.tensor.math import betaincinv, gammaincinv, tanh
@@ -130,6 +131,7 @@ def polyagamma_cdf(*args, **kwargs):
     "Moyal",
     "AsymmetricLaplace",
     "PolyaGamma",
+    "SkewStudentT",
 ]
 
 
@@ -1908,6 +1910,138 @@ def icdf(value, nu, mu, sigma):
         )
 
 
+class SkewStudentTRV(RandomVariable):
+    name = "skewstudentt"
+    ndim_supp = 0
+    ndims_params = [0, 0, 0, 0]
+    dtype = "floatX"
+    _print_name = ("SkewStudentT", "\\operatorname{SkewStudentT}")
+
+    @classmethod
+    def rng_fn(cls, rng, a, b, mu, sigma, size=None) -> np.ndarray:
+        return np.asarray(
+            stats.jf_skew_t.rvs(a=a, b=b, loc=mu, scale=sigma, size=size, random_state=rng)
+        )
+
+
+skewstudentt = SkewStudentTRV()
+
+
+class SkewStudentT(Continuous):
+    r"""
+    Skewed Student's T distribution log-likelihood.
+
+    This follows Jones and Faddy (2003)
+
+    The pdf of this distribution is
+
+    .. math::
+
+        f(t)=f(t ; a, b)=C_{a, b}^{-1}\left\{1+\frac{t}{\left(a+b+t^2\right)^{1 / 2}}\right\}^{a+1 / 2}\left\{1-\frac{t}{\left(a+b+t^2\right)^{1 / 2}}\right\}^{b+1 / 2}
+
+    where
+
+    .. math::
+
+        C_{a, b}=2^{a+b-1} B(a, b)(a+b)^{1 / 2}
+
+
+    ========  =============================================================
+    Support   :math:`x \in [\infty, \infty)`
+    Mean      :math:`E(T)=\frac{(a-b) \sqrt{(a+b)}}{2} \frac{\Gamma\left(a-\frac{1}{2}\right) \Gamma\left(b-\frac{1}{2}\right)}{\Gamma(a) \Gamma(b)}`
+    ========  =============================================================
+
+    Parameters
+    ----------
+    a : tensor_like of float
+        First kurtosis parameter (a > 0).
+    b : tensor_like of float
+        Second kurtosis parameter (b > 0).
+    mu : tensor_like of float
+        Location parameter.
+    sigma : tensor_like of float
+        Scale parameter (sigma > 0). Converges to the standard deviation as a and b
+        become close (only required if lam is not specified). Defaults to 1.
+    lam : tensor_like of float, optional
+        Scale parameter (lam > 0). Converges to the precision as a and b
+        become close (only required if sigma is not specified). Defaults to 1.
+
+    """
+
+    rv_op = skewstudentt
+
+    @classmethod
+    def dist(cls, a, b, *, mu=0, sigma=None, lam=None, **kwargs):
+        a = pt.as_tensor_variable(a)
+        b = pt.as_tensor_variable(b)
+        lam, sigma = get_tau_sigma(tau=lam, sigma=sigma)
+        sigma = pt.as_tensor_variable(sigma)
+
+        return super().dist([a, b, mu, sigma], **kwargs)
+
+    def support_point(rv, size, a, b, mu, sigma):
+        a, b, mu, _ = pt.broadcast_arrays(a, b, mu, sigma)
+        Et = mu + (a - b) * pt.sqrt(a + b) * gammafn(a - 0.5) * gammafn(b - 0.5) / (
+            2 * gammafn(a) * gammafn(b)
+        )
+        if not rv_size_is_none(size):
+            Et = pt.full(size, Et)
+        return Et
+
+    def logp(value, a, b, mu, sigma):
+        _, sigma = get_tau_sigma(sigma=sigma)
+
+        x = (value - mu) / sigma
+
+        a_ = (a + 0.5) * pt.log(1 + x / pt.sqrt(a + b + x**2))
+        b_ = (b + 0.5) * pt.log(1 - x / pt.sqrt(a + b + x**2))
+        c = (a + b - 1) * pt.log(2) + pt.special.betaln(a, b) + 0.5 * pt.log(a + b)
+
+        res = a_ + b_ - c - pt.log(sigma)
+
+        return check_parameters(
+            res,
+            a > 0,
+            b > 0,
+            sigma > 0,
+            msg="a > 0, b > 0, sigma > 0",
+        )
+
+    def logcdf(value, a, b, mu, sigma):
+        _, sigma = get_tau_sigma(sigma=sigma)
+
+        x = (value - mu) / sigma
+
+        y = (1 + x / pt.sqrt(a + b + x**2)) * 0.5
+        res = pt.log(pt.betainc(a, b, y))
+
+        return check_parameters(
+            res,
+            a > 0,
+            b > 0,
+            sigma > 0,
+            msg="a > 0, b > 0, sigma > 0",
+        )
+
+    def icdf(value, a, b, mu, sigma):
+        _, sigma = get_tau_sigma(sigma=sigma)
+
+        bval = betaincinv(a, b, value)
+        num = (2 * bval - 1) * pt.sqrt(a + b)
+        denom = 2 * pt.sqrt(bval * (1 - bval))
+        res = num / denom
+
+        res = mu + res * sigma
+        res = check_icdf_value(res, value)
+        return check_icdf_parameters(
+            res,
+            a > 0,
+            b > 0,
+            sigma > 0,
+            msg="a > 0, b > 0, sigma > 0",
+        )
+
+
 class Pareto(BoundedContinuous):
     r"""
     Pareto log-likelihood.
diff --git a/tests/distributions/test_continuous.py b/tests/distributions/test_continuous.py
@@ -549,6 +549,14 @@ def test_studentt_logp(self):
             lambda value, nu, mu, sigma: st.t.logpdf(value, nu, mu, sigma),
         )
 
+    def test_skewstudentt_logp(self):
+        check_logp(
+            pm.SkewStudentT,
+            R,
+            {"a": Rplus, "b": Rplus, "mu": R, "sigma": Rplus},
+            lambda value, a, b, mu, sigma: st.jf_skew_t.logpdf(value, a, b, mu, sigma),
+        )
+
     @pytest.mark.skipif(
         condition=(pytensor.config.floatX == "float32"),
         reason="Fails on float32 due to numerical issues",
@@ -574,6 +582,25 @@ def test_studentt_icdf(self):
             lambda q, nu, mu, sigma: st.t.ppf(q, nu, mu, sigma),
         )
 
+    @pytest.mark.skipif(
+        condition=(pytensor.config.floatX == "float32"),
+        reason="Fails on float32 due to numerical issues",
+    )
+    def test_skewstudentt_logcdf(self):
+        check_logcdf(
+            pm.SkewStudentT,
+            R,
+            {"a": Rplus, "b": Rplus, "mu": R, "sigma": Rplus},
+            lambda value, a, b, mu, sigma: st.jf_skew_t.logcdf(value, a, b, mu, sigma),
+        )
+
+    def test_skewstudentt_icdf(self):
+        check_icdf(
+            pm.SkewStudentT,
+            {"a": Rplusbig, "b": Rplusbig, "mu": R, "sigma": Rplusbig},
+            lambda q, a, b, mu, sigma: st.jf_skew_t.ppf(q, a, b, mu, sigma),
+        )
+
     def test_cauchy(self):
         check_logp(
             pm.Cauchy,
@@ -1250,6 +1277,27 @@ def test_studentt_support_point(self, mu, nu, sigma, size, expected):
             pm.StudentT("x", mu=mu, nu=nu, sigma=sigma, size=size)
         assert_support_point_is_expected(model, expected)
 
+    @pytest.mark.parametrize(
+        "a, b, mu, sigma, size, expected",
+        [
+            (1, 1, 0, 1, None, 0),
+            (np.ones(5), np.ones(5), 0, 1, None, np.zeros(5)),
+            (10, 10, np.arange(5), np.arange(1, 6), None, np.arange(5)),
+            (
+                10,
+                10,
+                np.arange(5),
+                np.arange(1, 6),
+                (2, 5),
+                np.full((2, 5), np.arange(5)),
+            ),
+        ],
+    )
+    def test_skewstudentt_support_point(self, a, b, mu, sigma, size, expected):
+        with pm.Model() as model:
+            pm.SkewStudentT("x", a=a, b=b, mu=mu, sigma=sigma, size=size)
+        assert_support_point_is_expected(model, expected)
+
     @pytest.mark.parametrize(
         "alpha, beta, size, expected",
         [
@@ -1896,6 +1944,19 @@ def halfstudentt_rng_fn(self, df, loc, scale, size, rng):
     ]
 
 
+class TestSkewStudentT(BaseTestDistributionRandom):
+    pymc_dist = pm.SkewStudentT
+    pymc_dist_params = {"a": 5.0, "b": 5.0, "mu": -1.0, "sigma": 2.0}
+    expected_rv_op_params = {"a": 5.0, "b": 5.0, "mu": -1.0, "sigma": 2.0}
+    reference_dist_params = {"a": 5.0, "b": 5.0, "loc": -1.0, "scale": 2.0}
+    reference_dist = seeded_scipy_distribution_builder("jf_skew_t")
+    checks_to_run = [
+        "check_pymc_params_match_rv_op",
+        "check_pymc_draws_match_reference",
+        "check_rv_size",
+    ]
+
+
 class TestMoyal(BaseTestDistributionRandom):
     pymc_dist = pm.Moyal
     pymc_dist_params = {"mu": 0.0, "sigma": 1.0}

-Original file line number
+Diff line change
 * Benjamin Edwards <[email protected]>
 * Brian Naughton <[email protected]>
 * Chad Heyne <[email protected]>
 -* Chris Fonnesbeck <chris.fonnesbeck@vanderbilt.edu>
 +* Chris Fonnesbeck <fonnesbeck@gmail.com>
 * Corey Farwell <[email protected]>
 * John Salvatier <[email protected]>
 * Karlson Pfannschmidt <[email protected]>