Add Kumaraswamy distribution (#2994) (#2998)

paulkernfeld · Junpeng Lao · commit f361844744c4 · 2018-06-06T14:50:44.000+02:00
* Add Kumaraswamy * I forgot that "as" is a keyword * Test Kumaraswamy PDF against scipy * Fix Kumaraswamy LaTeX * Add Kumaraswamy to release notes * Only test Kumaraswamy with nonnegative parameters * Revert "Only test Kumaraswamy with nonnegative parameters" This reverts commit 7e8a121. * Calculate Kumaraswamy moments in log space
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -12,6 +12,7 @@
 - Improve error message `Mass matrix contains zeros on the diagonal. Some derivatives might always be zero` during tuning of `pm.sample`
 - Improve error message `NaN occurred in optimization.` during ADVI
 - Save and load traces without `pickle` using `pm.save_trace` and `pm.load_trace`
+- Add `Kumaraswamy` distribution
 
 ### Fixes
 
diff --git a/docs/source/api/distributions/continuous.rst b/docs/source/api/distributions/continuous.rst
@@ -12,6 +12,7 @@ Continuous
    HalfNormal
    SkewNormal
    Beta
+   Kumaraswamy
    Exponential
    Laplace
    StudentT
diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
@@ -6,6 +6,7 @@
 from .continuous import HalfFlat
 from .continuous import Normal
 from .continuous import Beta
+from .continuous import Kumaraswamy
 from .continuous import Exponential
 from .continuous import Laplace
 from .continuous import StudentT
@@ -89,6 +90,7 @@
            'HalfFlat',
            'Normal',
            'Beta',
+           'Kumaraswamy',
            'Exponential',
            'Laplace',
            'StudentT',
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
@@ -24,7 +24,7 @@
 from .dist_math import bound, logpow, gammaln, betaln, std_cdf, alltrue_elemwise, SplineWrapper, i0e
 from .distribution import Continuous, draw_values, generate_samples
 
-__all__ = ['Uniform', 'Flat', 'HalfFlat', 'Normal', 'Beta', 'Exponential',
+__all__ = ['Uniform', 'Flat', 'HalfFlat', 'Normal', 'Beta', 'Kumaraswamy', 'Exponential',
            'Laplace', 'StudentT', 'Cauchy', 'HalfCauchy', 'Gamma', 'Weibull',
            'HalfStudentT', 'Lognormal', 'ChiSquared', 'HalfNormal', 'Wald',
            'Pareto', 'InverseGamma', 'ExGaussian', 'VonMises', 'SkewNormal',
@@ -701,6 +701,92 @@ def _repr_latex_(self, name=None, dist=None):
                                                                 get_variable_name(alpha),
                                                                 get_variable_name(beta))
 
+class Kumaraswamy(UnitContinuous):
+    R"""
+    Kumaraswamy log-likelihood.
+
+    The pdf of this distribution is
+
+    .. math::
+
+       f(x \mid a, b) =
+           abx^{a-1}(1-x^a)^{b-1}
+
+    .. plot::
+
+        import matplotlib.pyplot as plt
+        import numpy as np
+        plt.style.use('seaborn-darkgrid')
+        x = np.linspace(0, 1, 200)
+        a_s = [.5, 5., 1., 2., 2.]
+        b_s = [.5, 1., 3., 2., 5.]
+        for a, b in zip(a_s, b_s):
+            pdf = a * b * x ** (a - 1) * (1 - x ** a) ** (b - 1)
+            plt.plot(x, pdf, label=r'$a$ = {}, $b$ = {}'.format(a, b))
+        plt.xlabel('x', fontsize=12)
+        plt.ylabel('f(x)', fontsize=12)
+        plt.ylim(0, 3.)
+        plt.legend(loc=9)
+        plt.show()
+
+    ========  ==============================================================
+    Support   :math:`x \in (0, 1)`
+    Mean      :math:`b B(1 + \tfrac{1}{a}, b)`
+    Variance  :math:`b B(1 + \tfrac{2}{a}, b) - (b B(1 + \tfrac{1}{a}, b))^2`
+    ========  ==============================================================
+
+    Parameters
+    ----------
+    a : float
+        a > 0.
+    b : float
+        b > 0.
+    """
+
+    def __init__(self, a, b, *args, **kwargs):
+        super(Kumaraswamy, self).__init__(*args, **kwargs)
+
+        self.a = a = tt.as_tensor_variable(a)
+        self.b = b = tt.as_tensor_variable(b)
+
+        ln_mean = tt.log(b) + tt.gammaln(1 + 1 / a) + tt.gammaln(b) - tt.gammaln(1 + 1 / a + b)
+        self.mean = tt.exp(ln_mean)
+        ln_2nd_raw_moment = tt.log(b) + tt.gammaln(1 + 2 / a) + tt.gammaln(b) - tt.gammaln(1 + 2 / a + b)
+        self.variance = tt.exp(ln_2nd_raw_moment) - self.mean ** 2
+
+        assert_negative_support(a, 'a', 'Kumaraswamy')
+        assert_negative_support(b, 'b', 'Kumaraswamy')
+
+    def _random(self, a, b, size=None):
+        u = np.random.uniform(size=size)
+        return (1 - (1 - u) ** (1 / b)) ** (1 / a)
+
+    def random(self, point=None, size=None):
+        a, b = draw_values([self.a, self.b],
+                           point=point, size=size)
+        return generate_samples(self._random, a, b,
+                                dist_shape=self.shape,
+                                size=size)
+
+    def logp(self, value):
+        a = self.a
+        b = self.b
+
+        logp = tt.log(a) + tt.log(b) + (a - 1) * tt.log(value) + (b - 1) * tt.log(1 - value ** a)
+
+        return bound(logp,
+                     value >= 0, value <= 1,
+                     a > 0, b > 0)
+
+    def _repr_latex_(self, name=None, dist=None):
+        if dist is None:
+            dist = self
+        a = dist.a
+        b = dist.b
+        name = r'\text{%s}' % name
+        return r'${} \sim \text{{Kumaraswamy}}(\mathit{{a}}={},~\mathit{{b}}={})$'.format(name,
+                                                                                          get_variable_name(a),
+                                                                                          get_variable_name(b))
 
 class Exponential(PositiveContinuous):
     R"""
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -16,7 +16,8 @@
                              Flat, LKJCorr, Wald, ChiSquared, HalfNormal, DiscreteUniform,
                              Bound, Uniform, Triangular, Binomial, SkewNormal, DiscreteWeibull,
                              Gumbel, Logistic, OrderedLogistic, LogitNormal, Interpolated,
-                             ZeroInflatedBinomial, HalfFlat, AR1, KroneckerNormal, Rice)
+                             ZeroInflatedBinomial, HalfFlat, AR1, KroneckerNormal, Rice,
+                             Kumaraswamy)
 
 from ..distributions import continuous
 from pymc3.theanof import floatX
@@ -580,6 +581,12 @@ def test_beta(self):
                                  lambda value, alpha, beta: sp.beta.logpdf(value, alpha, beta))
         self.pymc3_matches_scipy(Beta, Unit, {'mu': Unit, 'sd': Rplus}, beta_mu_sd)
 
+    def test_kumaraswamy(self):
+        # Scipy does not have a built-in Kumaraswamy pdf
+        def scipy_log_pdf(value, a, b):
+            return np.log(a) + np.log(b) + (a - 1) * np.log(value) + (b - 1) * np.log(1 - value ** a)
+        self.pymc3_matches_scipy(Kumaraswamy, Unit, {'a': Rplus, 'b': Rplus}, scipy_log_pdf)
+
     def test_exponential(self):
         self.pymc3_matches_scipy(Exponential, Rplus, {'lam': Rplus},
                                  lambda value, lam: sp.expon.logpdf(value, 0, 1 / lam))
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -230,6 +230,11 @@ class TestBeta(BaseTestCases.BaseTestCase):
     params = {'alpha': 1., 'beta': 1.}
 
 
+class TestKumaraswamy(BaseTestCases.BaseTestCase):
+    distribution = pm.Kumaraswamy
+    params = {'a': 1., 'b': 1.}
+
+
 class TestExponential(BaseTestCases.BaseTestCase):
     distribution = pm.Exponential
     params = {'lam': 1.}