Use dispatching for default transform instead of overriding __new__

ricardoV94 · ricardoV94 · commit 21b289ad6e4a · 2022-03-28T11:18:27.000+02:00
diff --git a/docs/source/contributing/developer_guide_implementing_distribution.md b/docs/source/contributing/developer_guide_implementing_distribution.md
@@ -193,9 +193,8 @@ class Blah(PositiveContinuous):
 
 Some notes:
 
-1. A distribution should at the very least inherit from {class}`~pymc.distributions.Discrete` or {class}`~pymc.distributions.Continuous`. For the latter, more specific subclasses exist: `PositiveContinuous`, `UnitContinuous`, `BoundedContinuous`, `CircularContinuous`, which specify default transformations for the variables. If you need to specify a one-time custom transform you can also override the `__new__` method, as is done for the {class}`~pymc.distributions.multivariate.Dirichlet`.
-1. If a distribution does not have a corresponding `random` implementation, a `RandomVariable` should still be created that raises a `NotImplementedError`. This is the case for the {class}`~pymc.distributions.continuous.Flat`. In this case it will be necessary to provide a standard `initval` by
-   overriding `__new__`.
+1. A distribution should at the very least inherit from {class}`~pymc.distributions.Discrete` or {class}`~pymc.distributions.Continuous`. For the latter, more specific subclasses exist: `PositiveContinuous`, `UnitContinuous`, `BoundedContinuous`, `CircularContinuous`, `SimplexContinuous`, which specify default transformations for the variables. If you need to specify a one-time custom transform you can also create a `_default_transform` dispatch function as is done for the {class}`~pymc.distributions.multivariate.LKJCholeskyCov`.
+1. If a distribution does not have a corresponding `random` implementation, a `RandomVariable` should still be created that raises a `NotImplementedError`. This is the case for the {class}`~pymc.distributions.continuous.Flat`. In this case it will be necessary to provide a `moment` method.
 1. As mentioned above, `PyMC` v4.x works in a very {term}`functional <Functional Programming>` way, and all the information that is needed in the `logp` and `logcdf` methods is expected to be "carried" via the `RandomVariable` inputs. You may pass numerical arguments that are not strictly needed for the `rng_fn` method but are used in the `logp` and `logcdf` methods. Just keep in mind whether this affects the correct shape inference behavior of the `RandomVariable`. If specialized non-numeric information is needed you might need to define your custom`_logp` and `_logcdf` {term}`Dispatching` functions, but this should be done as a last resort.
 1. The `logcdf` method is not a requirement, but it's a nice plus!
 1. Currently only one moment is supported in the `moment` method, and probably the "higher-order" one is the most useful (that is `mean` > `median` > `mode`)... You might need to truncate the moment if you are dealing with a discrete distribution.
diff --git a/pymc/distributions/bound.py b/pymc/distributions/bound.py
@@ -20,11 +20,12 @@
 from aesara.tensor.var import TensorVariable
 
 from pymc.aesaraf import floatX, intX
-from pymc.distributions.continuous import BoundedContinuous
+from pymc.distributions.continuous import BoundedContinuous, bounded_cont_transform
 from pymc.distributions.dist_math import check_parameters
 from pymc.distributions.distribution import Continuous, Discrete
 from pymc.distributions.logprob import logp
 from pymc.distributions.shape_utils import to_tuple
+from pymc.distributions.transforms import _default_transform
 from pymc.model import modelcontext
 from pymc.util import check_dist_not_registered
 
@@ -82,6 +83,11 @@ def logp(value, distribution, lower, upper):
         )
 
 
+@_default_transform.register(BoundRV)
+def bound_default_transform(op, rv):
+    return bounded_cont_transform(op, rv, _ContinuousBounded.bound_args_indices)
+
+
 class DiscreteBoundRV(BoundRV):
     name = "discrete_bound"
     dtype = "int64"
@@ -94,8 +100,8 @@ class _DiscreteBounded(Discrete):
     rv_op = discrete_boundrv
 
     def __new__(cls, *args, **kwargs):
-        transform = kwargs.get("transform", None)
-        if transform is not None:
+        kwargs.setdefault("transform", None)
+        if kwargs.get("transform") is not None:
             raise ValueError("Cannot transform discrete variable.")
         return super().__new__(cls, *args, **kwargs)
 
diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
@@ -89,7 +89,6 @@ def polyagamma_cdf(*args, **kwargs):
 from pymc.distributions.shape_utils import rv_size_is_none
 from pymc.distributions.transforms import _default_transform
 from pymc.math import invlogit, logdiffexp, logit
-from pymc.util import UNSET
 
 __all__ = [
     "Uniform",
@@ -140,6 +139,13 @@ class CircularContinuous(Continuous):
     """Base class for circular continuous distributions"""
 
 
+class BoundedContinuous(Continuous):
+    """Base class for bounded continuous distributions"""
+
+    # Indices of the arguments that define the lower and upper bounds of the distribution
+    bound_args_indices: Optional[List[int]] = None
+
+
 @_default_transform.register(PositiveContinuous)
 def pos_cont_transform(op, rv):
     return transforms.log
@@ -155,48 +161,34 @@ def circ_cont_transform(op, rv):
     return transforms.circular
 
 
-class BoundedContinuous(Continuous):
-    """Base class for bounded continuous distributions"""
-
-    # Indices of the arguments that define the lower and upper bounds of the distribution
-    bound_args_indices: Optional[List[int]] = None
-
-    def __new__(cls, *args, **kwargs):
-        transform = kwargs.get("transform", UNSET)
-        if transform is UNSET:
-            kwargs["transform"] = cls.default_transform()
-        return super().__new__(cls, *args, **kwargs)
-
-    @classmethod
-    def default_transform(cls):
-        if cls.bound_args_indices is None:
-            raise ValueError(
-                f"Must specify bound_args_indices for {cls.__name__} bounded distribution"
-            )
+@_default_transform.register(BoundedContinuous)
+def bounded_cont_transform(op, rv, bound_args_indices=None):
+    if bound_args_indices is None:
+        raise ValueError(f"Must specify bound_args_indices for {op} bounded distribution")
 
-        def transform_params(*args):
+    def transform_params(*args):
 
-            lower, upper = None, None
-            if cls.bound_args_indices[0] is not None:
-                lower = args[cls.bound_args_indices[0]]
-            if cls.bound_args_indices[1] is not None:
-                upper = args[cls.bound_args_indices[1]]
+        lower, upper = None, None
+        if bound_args_indices[0] is not None:
+            lower = args[bound_args_indices[0]]
+        if bound_args_indices[1] is not None:
+            upper = args[bound_args_indices[1]]
 
-            if lower is not None:
-                if isinstance(lower, TensorConstant) and np.all(lower.value == -np.inf):
-                    lower = None
-                else:
-                    lower = at.as_tensor_variable(lower)
+        if lower is not None:
+            if isinstance(lower, TensorConstant) and np.all(lower.value == -np.inf):
+                lower = None
+            else:
+                lower = at.as_tensor_variable(lower)
 
-            if upper is not None:
-                if isinstance(upper, TensorConstant) and np.all(upper.value == np.inf):
-                    upper = None
-                else:
-                    upper = at.as_tensor_variable(upper)
+        if upper is not None:
+            if isinstance(upper, TensorConstant) and np.all(upper.value == np.inf):
+                upper = None
+            else:
+                upper = at.as_tensor_variable(upper)
 
-            return lower, upper
+        return lower, upper
 
-        return transforms.Interval(bounds_fn=transform_params)
+    return transforms.Interval(bounds_fn=transform_params)
 
 
 def assert_negative_support(var, label, distname, value=-1e-6):
@@ -338,6 +330,11 @@ def logcdf(value, lower, upper):
         )
 
 
+@_default_transform.register(Uniform)
+def uniform_default_transform(op, rv):
+    return bounded_cont_transform(op, rv, Uniform.bound_args_indices)
+
+
 class FlatRV(RandomVariable):
     name = "flat"
     ndim_supp = 0
@@ -788,6 +785,11 @@ def logp(
         return check_parameters(logp, *bounds)
 
 
+@_default_transform.register(TruncatedNormal)
+def truncated_normal_default_transform(op, rv):
+    return bounded_cont_transform(op, rv, TruncatedNormal.bound_args_indices)
+
+
 class HalfNormal(PositiveContinuous):
     r"""
     Half-normal log-likelihood.
@@ -2065,6 +2067,11 @@ def logcdf(
         return check_parameters(res, 0 < alpha, 0 < m, msg="alpha > 0, m > 0")
 
 
+@_default_transform.register(Pareto)
+def pareto_default_transform(op, rv):
+    return bounded_cont_transform(op, rv, Pareto.bound_args_indices)
+
+
 class Cauchy(Continuous):
     r"""
     Cauchy log-likelihood.
@@ -3245,6 +3252,11 @@ def logcdf(value, lower, c, upper):
         )
 
 
+@_default_transform.register(Triangular)
+def triangular_default_transform(op, rv):
+    return bounded_cont_transform(op, rv, Triangular.bound_args_indices)
+
+
 class Gumbel(Continuous):
     r"""
     Univariate Gumbel log-likelihood.
@@ -3763,17 +3775,6 @@ class Interpolated(BoundedContinuous):
 
     rv_op = interpolated
 
-    def __new__(cls, *args, **kwargs):
-        transform = kwargs.get("transform", UNSET)
-        if transform is UNSET:
-
-            def transform_params(*params):
-                _, _, _, x_points, _, _ = params
-                return floatX(x_points[0]), floatX(x_points[-1])
-
-            kwargs["transform"] = transforms.Interval(bounds_fn=transform_params)
-        return super().__new__(cls, *args, **kwargs)
-
     @classmethod
     def dist(cls, x_points, pdf_points, *args, **kwargs):
 
@@ -3827,8 +3828,14 @@ def logp(value, x_points, pdf_points, cdf_points):
 
         return at.log(interp_op(value) / Z)
 
-    def _distr_parameters_for_repr(self):
-        return []
+
+@_default_transform.register(Interpolated)
+def interpolated_default_transform(op, rv):
+    def transform_params(*params):
+        _, _, _, x_points, _, _ = params
+        return floatX(x_points[0]), floatX(x_points[-1])
+
+    return transforms.Interval(bounds_fn=transform_params)
 
 
 class MoyalRV(RandomVariable):
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
@@ -62,9 +62,9 @@
     rv_size_is_none,
     to_tuple,
 )
-from pymc.distributions.transforms import Interval
+from pymc.distributions.transforms import Interval, _default_transform
 from pymc.math import kron_diag, kron_dot
-from pymc.util import UNSET, check_dist_not_registered
+from pymc.util import check_dist_not_registered
 
 __all__ = [
     "MvNormal",
@@ -83,6 +83,16 @@
     "StickBreakingWeights",
 ]
 
+
+class SimplexContinuous(Continuous):
+    """Base class for simplex continuous distributions"""
+
+
+@_default_transform.register(SimplexContinuous)
+def simplex_cont_transform(op, rv):
+    return transforms.simplex
+
+
 # Step methods and advi do not catch LinAlgErrors at the
 # moment. We work around that by using a cholesky op
 # that returns a nan as first entry instead of raising
@@ -408,7 +418,7 @@ def logp(value, nu, mu, cov):
         )
 
 
-class Dirichlet(Continuous):
+class Dirichlet(SimplexContinuous):
     r"""
     Dirichlet log-likelihood.
 
@@ -434,10 +444,6 @@ class Dirichlet(Continuous):
     """
     rv_op = dirichlet
 
-    def __new__(cls, name, *args, **kwargs):
-        kwargs.setdefault("transform", transforms.simplex)
-        return super().__new__(cls, name, *args, **kwargs)
-
     @classmethod
     def dist(cls, a, **kwargs):
         a = at.as_tensor_variable(a)
@@ -1169,12 +1175,7 @@ class _LKJCholeskyCov(Continuous):
     rv_op = _ljk_cholesky_cov
 
     def __new__(cls, name, eta, n, sd_dist, **kwargs):
-        transform = kwargs.get("transform", UNSET)
-        if transform is UNSET:
-            kwargs["transform"] = transforms.CholeskyCovPacked(n)
-
         check_dist_not_registered(sd_dist)
-
         return super().__new__(cls, name, eta, n, sd_dist, **kwargs)
 
     @classmethod
@@ -1269,6 +1270,12 @@ def logp(value, n, eta, sd_dist):
         return norm + logp_lkj + logp_sd + det_invjac
 
 
+@_default_transform.register(_LKJCholeskyCov)
+def lkjcholeskycov_default_transform(op, rv):
+    _, _, _, n, _, _ = rv.owner.inputs
+    return transforms.CholeskyCovPacked(n)
+
+
 class LKJCholeskyCov:
     r"""Wrapper class for covariance matrix with LKJ distributed correlations.
 
@@ -1551,12 +1558,6 @@ class LKJCorr(BoundedContinuous):
 
     rv_op = lkjcorr
 
-    def __new__(cls, *args, **kwargs):
-        transform = kwargs.get("transform", UNSET)
-        if transform is UNSET:
-            kwargs["transform"] = Interval(floatX(-1.0), floatX(1.0))
-        return super().__new__(cls, *args, **kwargs)
-
     @classmethod
     def dist(cls, n, eta, **kwargs):
         n = at.as_tensor_variable(intX(n))
@@ -1610,6 +1611,11 @@ def logp(value, n, eta):
         )
 
 
+@_default_transform.register(LKJCorr)
+def lkjcorr_default_transform(op, rv):
+    return Interval(floatX(-1.0), floatX(1.0))
+
+
 class MatrixNormalRV(RandomVariable):
     name = "matrixnormal"
     ndim_supp = 2
@@ -2261,7 +2267,7 @@ def rng_fn(cls, rng, alpha, K, size):
 stickbreakingweights = StickBreakingWeightsRV()
 
 
-class StickBreakingWeights(Continuous):
+class StickBreakingWeights(SimplexContinuous):
     r"""
     Likelihood of truncated stick-breaking weights. The weights are generated from a
     stick-breaking proceduce where :math:`x_k = v_k \prod_{\ell < k} (1 - v_\ell)` for
@@ -2298,10 +2304,6 @@ class StickBreakingWeights(Continuous):
     """
     rv_op = stickbreakingweights
 
-    def __new__(cls, name, *args, **kwargs):
-        kwargs.setdefault("transform", transforms.simplex)
-        return super().__new__(cls, name, *args, **kwargs)
-
     @classmethod
     def dist(cls, alpha, K, *args, **kwargs):
         alpha = at.as_tensor_variable(floatX(alpha))