Refactor BetaBinomial

ricardoV94 · ricardoV94 · commit d52ae504c19f · 2021-05-05T09:40:15.000+02:00
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -19,6 +19,7 @@
 from aesara.tensor.random.basic import (
     RandomVariable,
     bernoulli,
+    betabinom,
     binomial,
     categorical,
     geometric,
@@ -41,7 +42,7 @@
     normal_lcdf,
 )
 from pymc3.distributions.distribution import Discrete
-from pymc3.math import log1mexp, logaddexp, logsumexp, sigmoid, tround
+from pymc3.math import log1mexp, logaddexp, logsumexp, sigmoid
 
 __all__ = [
     "Binomial",
@@ -227,58 +228,16 @@ def BetaBinom(a, b, n, x):
         beta > 0.
     """
 
-    def __init__(self, alpha, beta, n, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.alpha = alpha = at.as_tensor_variable(floatX(alpha))
-        self.beta = beta = at.as_tensor_variable(floatX(beta))
-        self.n = n = at.as_tensor_variable(intX(n))
-        self.mode = at.cast(tround(alpha / (alpha + beta)), "int8")
-
-    def _random(self, alpha, beta, n, size=None):
-        size = size or ()
-        p = stats.beta.rvs(a=alpha, b=beta, size=size).flatten()
-        # Sometimes scipy.beta returns nan. Ugh.
-        while np.any(np.isnan(p)):
-            i = np.isnan(p)
-            p[i] = stats.beta.rvs(a=alpha, b=beta, size=np.sum(i))
-        # Sigh...
-        _n, _p, _size = np.atleast_1d(n).flatten(), p.flatten(), p.shape[0]
-
-        quotient, remainder = divmod(_p.shape[0], _n.shape[0])
-        if remainder != 0:
-            raise TypeError(
-                "n has a bad size! Was cast to {}, must evenly divide {}".format(
-                    _n.shape[0], _p.shape[0]
-                )
-            )
-        if quotient != 1:
-            _n = np.tile(_n, quotient)
-        samples = np.reshape(stats.binom.rvs(n=_n, p=_p, size=_size), size)
-        return samples
-
-    def random(self, point=None, size=None):
-        r"""
-        Draw random values from BetaBinomial distribution.
+    rv_op = betabinom
 
-        Parameters
-        ----------
-        point: dict, optional
-            Dict of variable values on which random values are to be
-            conditioned (uses default point if not specified).
-        size: int, optional
-            Desired size of random sample (returns one sample if not
-            specified).
-
-        Returns
-        -------
-        array
-        """
-        # alpha, beta, n = draw_values([self.alpha, self.beta, self.n], point=point, size=size)
-        # return generate_samples(
-        #     self._random, alpha=alpha, beta=beta, n=n, dist_shape=self.shape, size=size
-        # )
+    @classmethod
+    def dist(cls, alpha, beta, n, *args, **kwargs):
+        alpha = at.as_tensor_variable(floatX(alpha))
+        beta = at.as_tensor_variable(floatX(beta))
+        n = at.as_tensor_variable(intX(n))
+        return super().dist([n, alpha, beta], **kwargs)
 
-    def logp(self, value):
+    def logp(value, n, alpha, beta):
         r"""
         Calculate log-probability of BetaBinomial distribution at specified value.
 
@@ -292,9 +251,6 @@ def logp(self, value):
         -------
         TensorVariable
         """
-        alpha = self.alpha
-        beta = self.beta
-        n = self.n
         return bound(
             binomln(n, value) + betaln(value + alpha, n - value + beta) - betaln(alpha, beta),
             value >= 0,
@@ -303,7 +259,7 @@ def logp(self, value):
             beta > 0,
         )
 
-    def logcdf(self, value):
+    def logcdf(value, n, alpha, beta):
         """
         Compute the log of the cumulative distribution function for BetaBinomial distribution
         at the specified value.
@@ -323,15 +279,15 @@ def logcdf(self, value):
                 f"BetaBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object."
             )
 
-        alpha = self.alpha
-        beta = self.beta
-        n = self.n
         safe_lower = at.switch(at.lt(value, 0), value, 0)
 
         return bound(
             at.switch(
                 at.lt(value, n),
-                logsumexp(self.logp(at.arange(safe_lower, value + 1)), keepdims=False),
+                logsumexp(
+                    BetaBinomial.logp(at.arange(safe_lower, value + 1), n, alpha, beta),
+                    keepdims=False,
+                ),
                 0,
             ),
             0 <= value,
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -1496,8 +1496,7 @@ def test_binomial(self):
             n_samples=10,
         )
 
-    # Too lazy to propagate decimal parameter through the whole chain of deps
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
+    @pytest.mark.xfail(reason="checkd tests has not been refactored")
     @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
     def test_beta_binomial_distribution(self):
         self.checkd(
@@ -1506,7 +1505,6 @@ def test_beta_binomial_distribution(self):
             {"alpha": Rplus, "beta": Rplus, "n": NatSmall},
         )
 
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
     @pytest.mark.skipif(
         condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0"
     )
@@ -1518,7 +1516,6 @@ def test_beta_binomial_logp(self):
             lambda value, alpha, beta, n: sp.betabinom.logpmf(value, a=alpha, b=beta, n=n),
         )
 
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
     @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
     @pytest.mark.skipif(
         condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0"
@@ -1531,7 +1528,6 @@ def test_beta_binomial_logcdf(self):
             lambda value, alpha, beta, n: sp.betabinom.logcdf(value, a=alpha, b=beta, n=n),
         )
 
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
     def test_beta_binomial_selfconsistency(self):
         self.check_selfconsistency_discrete_logcdf(
             BetaBinomial,
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -13,7 +13,6 @@
 #   limitations under the License.
 import functools
 import itertools
-import sys
 
 from contextlib import ExitStack as does_not_raise
 from typing import Callable, List, Optional
@@ -312,12 +311,6 @@ class TestLogitNormal(BaseTestCases.BaseTestCase):
     params = {"mu": 0.0, "sigma": 1.0}
 
 
-@pytest.mark.xfail(reason="This distribution has not been refactored for v4")
-class TestBetaBinomial(BaseTestCases.BaseTestCase):
-    distribution = pm.BetaBinomial
-    params = {"n": 5, "alpha": 1.0, "beta": 1.0}
-
-
 @pytest.mark.xfail(reason="This distribution has not been refactored for v4")
 class TestConstant(BaseTestCases.BaseTestCase):
     distribution = pm.Constant
@@ -893,6 +886,17 @@ def seeded_weibul_rng_fn(self):
     ]
 
 
+class TestBetaBinomial(BaseTestDistribution):
+    pymc_dist = pm.BetaBinomial
+    pymc_dist_params = {"alpha": 2.0, "beta": 1.0, "n": 5}
+    expected_rv_op_params = {"n": 5, "alpha": 2.0, "beta": 1.0}
+    reference_dist_params = {"n": 5, "a": 2.0, "b": 1.0}
+    tests_to_run = [
+        "check_pymc_params_match_rv_op",
+        "check_rv_size",
+    ]
+
+
 class TestScalarParameterSamples(SeededTest):
     @pytest.mark.xfail(reason="This distribution has not been refactored for v4")
     def test_bounded(self):
@@ -1002,19 +1006,6 @@ def test_half_flat(self):
             with pytest.raises(ValueError):
                 f.random(1)
 
-    @pytest.mark.xfail(reason="This distribution has not been refactored for v4")
-    @pytest.mark.xfail(
-        sys.platform.startswith("win"),
-        reason="Known issue: https://github.com/pymc-devs/pymc3/pull/4269",
-    )
-    def test_beta_binomial(self):
-        pymc3_random_discrete(
-            pm.BetaBinomial, {"n": Nat, "alpha": Rplus, "beta": Rplus}, ref_rand=self._beta_bin
-        )
-
-    def _beta_bin(self, n, alpha, beta, size=None):
-        return st.binom.rvs(n, st.beta.rvs(a=alpha, b=beta, size=size))
-
     @pytest.mark.xfail(reason="This distribution has not been refactored for v4")
     def test_discrete_uniform(self):
         def ref_rand(size, lower, upper):

Original file line number	Diff line number	Diff line change
`@@ -1496,8 +1496,7 @@ def test_binomial(self):`
`1496`	`1496`	`n_samples=10,`
`1497`	`1497`	`)`
`1498`	`1498`
`1499`		`- # Too lazy to propagate decimal parameter through the whole chain of deps`
`1500`		`- @pytest.mark.xfail(reason="Distribution not refactored yet")`
	`1499`	`+ @pytest.mark.xfail(reason="checkd tests has not been refactored")`
`1501`	`1500`	`@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")`
`1502`	`1501`	`def test_beta_binomial_distribution(self):`
`1503`	`1502`	`self.checkd(`
`@@ -1506,7 +1505,6 @@ def test_beta_binomial_distribution(self):`
`1506`	`1505`	`{"alpha": Rplus, "beta": Rplus, "n": NatSmall},`
`1507`	`1506`	`)`
`1508`	`1507`
`1509`		`- @pytest.mark.xfail(reason="Distribution not refactored yet")`
`1510`	`1508`	`@pytest.mark.skipif(`
`1511`	`1509`	`condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0"`
`1512`	`1510`	`)`
`@@ -1518,7 +1516,6 @@ def test_beta_binomial_logp(self):`
`1518`	`1516`	`lambda value, alpha, beta, n: sp.betabinom.logpmf(value, a=alpha, b=beta, n=n),`
`1519`	`1517`	`)`
`1520`	`1518`
`1521`		`- @pytest.mark.xfail(reason="Distribution not refactored yet")`
`1522`	`1519`	`@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")`
`1523`	`1520`	`@pytest.mark.skipif(`
`1524`	`1521`	`condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0"`
`@@ -1531,7 +1528,6 @@ def test_beta_binomial_logcdf(self):`
`1531`	`1528`	`lambda value, alpha, beta, n: sp.betabinom.logcdf(value, a=alpha, b=beta, n=n),`
`1532`	`1529`	`)`
`1533`	`1530`
`1534`		`- @pytest.mark.xfail(reason="Distribution not refactored yet")`
`1535`	`1531`	`def test_beta_binomial_selfconsistency(self):`
`1536`	`1532`	`self.check_selfconsistency_discrete_logcdf(`
`1537`	`1533`	`BetaBinomial,`