From e30568d4b6df9942ef5df22bf5180f8f4e97f66b Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 17 Nov 2021 18:30:20 +0100 Subject: [PATCH 1/4] Make get_moment Op agnostic --- pymc/distributions/distribution.py | 17 +++++++---------- pymc/distributions/simulator.py | 6 +++--- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index fd849be49e..c4a68c6e02 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -115,8 +115,8 @@ def logcdf(op, value, *dist_params, **kwargs): if class_initval: @_get_moment.register(rv_type) - def get_moment(op, rv, size, *rv_inputs): - return class_initval(rv, size, *rv_inputs) + def get_moment(op, rv, rng, size, dtype, *dist_params): + return class_initval(rv, size, *dist_params) # Register the Aesara `RandomVariable` type as a subclass of this # `Distribution` type. @@ -355,10 +355,8 @@ def dist( @singledispatch -def _get_moment(op, rv, size, *rv_inputs) -> TensorVariable: - raise NotImplementedError( - f"Random variable {rv} of type {op} has no get_moment implementation." - ) +def _get_moment(op, rv, *rv_inputs) -> TensorVariable: + raise NotImplementedError(f"Variable {rv} of type {op} has no get_moment implementation.") def get_moment(rv: TensorVariable) -> TensorVariable: @@ -368,8 +366,7 @@ def get_moment(rv: TensorVariable) -> TensorVariable: The only parameter to this function is the RandomVariable for which the value is to be derived. """ - size = rv.owner.inputs[1] - return _get_moment(rv.owner.op, rv, size, *rv.owner.inputs[3:]).astype(rv.dtype) + return _get_moment(rv.owner.op, rv, *rv.owner.inputs).astype(rv.dtype) class Discrete(Distribution): @@ -591,8 +588,8 @@ def density_dist_logcdf(op, var, rvs_to_values, *dist_params, **kwargs): return logcdf(value_var, *dist_params, **kwargs) @_get_moment.register(rv_type) - def density_dist_get_moment(op, rv, size, *rv_inputs): - return get_moment(rv, size, *rv_inputs) + def density_dist_get_moment(op, rv, rng, size, dtype, *dist_params): + return get_moment(rv, size, *dist_params) cls.rv_op = rv_op return super().__new__(cls, name, *dist_params, **kwargs) diff --git a/pymc/distributions/simulator.py b/pymc/distributions/simulator.py index 424cf42df6..0affa2b6a2 100644 --- a/pymc/distributions/simulator.py +++ b/pymc/distributions/simulator.py @@ -224,8 +224,8 @@ def logp(op, value_var_list, *dist_params, **kwargs): return cls.logp(value_var, op, dist_params) @_get_moment.register(SimulatorRV) - def get_moment(op, rv, size, *rv_inputs): - return cls.get_moment(rv, size, *rv_inputs) + def get_moment(op, rv, rng, size, dtype, *rv_inputs): + return cls.get_moment(rv, *rv_inputs) cls.rv_op = sim_op return super().__new__(cls, name, *params, **kwargs) @@ -235,7 +235,7 @@ def dist(cls, *params, **kwargs): return super().dist(params, **kwargs) @classmethod - def get_moment(cls, rv, size, *sim_inputs): + def get_moment(cls, rv, *sim_inputs): # Take the mean of 10 draws multiple_sim = rv.owner.op(*sim_inputs, size=at.concatenate([[10], rv.shape])) return at.mean(multiple_sim, axis=0) From 46a25c084dc87dcf58f50c748effc19a3809c515 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 4 Jan 2022 12:15:28 +0100 Subject: [PATCH 2/4] Create base SymbolicDistribution --- docs/source/api/distributions/utilities.rst | 1 + pymc/distributions/distribution.py | 261 +++++++++++++++++++- pymc/distributions/shape_utils.py | 9 +- 3 files changed, 268 insertions(+), 3 deletions(-) diff --git a/docs/source/api/distributions/utilities.rst b/docs/source/api/distributions/utilities.rst index 441eca24a6..a6ba09b059 100644 --- a/docs/source/api/distributions/utilities.rst +++ b/docs/source/api/distributions/utilities.rst @@ -7,6 +7,7 @@ Distribution utilities :toctree: generated/ Distribution + SymbolicDistribution Discrete Continuous NoDistribution diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index c4a68c6e02..cf9d87eb0b 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -19,7 +19,7 @@ from abc import ABCMeta from functools import singledispatch -from typing import Callable, Optional, Sequence +from typing import Callable, Iterable, Optional, Sequence import aesara @@ -354,6 +354,265 @@ def dist( return rv_out +class SymbolicDistribution: + def __new__( + cls, + name: str, + *args, + rngs: Optional[Iterable] = None, + dims: Optional[Dims] = None, + initval=None, + observed=None, + total_size=None, + transform=UNSET, + **kwargs, + ) -> TensorVariable: + """Adds a TensorVariable corresponding to a PyMC symbolic distribution to the + current model. + + While traditional PyMC distributions are represented by a single RandomVariable + graph, Symbolic distributions correspond to a larger graph that contains one or + more RandomVariables and an arbitrary number of deterministic operations, which + represent their own kind of distribution. + + The graphs returned by symbolic distributions can be evaluated directly to + obtain valid draws and can further be parsed by Aeppl to derive the + corresponding logp at runtime. + + Check pymc.distributions.Censored for an example of a symbolic distribution. + + Symbolic distributions must implement the following classmethods: + cls.dist + Performs input validation and converts optional alternative parametrizations + to a canonical parametrization. It should call `super().dist()`, passing a + list with the default parameters as the first and only non keyword argument, + followed by other keyword arguments like size and rngs, and return the result + cls.rv_op + Returns a TensorVariable that represents the symbolic distribution + parametrized by a default set of parameters and a size and rngs arguments + cls.ndim_supp + Returns the support of the symbolic distribution, given the default + parameters. This may not always be constant, for instance if the symbolic + distribution can be defined based on an arbitrary base distribution. + cls.change_size + Returns an equivalent symbolic distribution with a different size. This is + analogous to `pymc.aesaraf.change_rv_size` for `RandomVariable`s. + cls.graph_rvs + Returns base RVs in a symbolic distribution. + + Parameters + ---------- + cls : type + A distribution class that inherits from SymbolicDistribution. + name : str + Name for the new model variable. + rngs : optional + Random number generator to use for the RandomVariable(s) in the graph. + dims : tuple, optional + A tuple of dimension names known to the model. + initval : optional + Numeric or symbolic untransformed initial value of matching shape, + or one of the following initial value strategies: "moment", "prior". + Depending on the sampler's settings, a random jitter may be added to numeric, + symbolic or moment-based initial values in the transformed space. + observed : optional + Observed data to be passed when registering the random variable in the model. + See ``Model.register_rv``. + total_size : float, optional + See ``Model.register_rv``. + transform : optional + See ``Model.register_rv``. + **kwargs + Keyword arguments that will be forwarded to ``.dist()``. + Most prominently: ``shape`` and ``size`` + + Returns + ------- + var : TensorVariable + The created variable, registered in the Model. + """ + + try: + from pymc.model import Model + + model = Model.get_context() + except TypeError: + raise TypeError( + "No model on context stack, which is needed to " + "instantiate distributions. Add variable inside " + "a 'with model:' block, or use the '.dist' syntax " + "for a standalone distribution." + ) + + if "testval" in kwargs: + initval = kwargs.pop("testval") + warnings.warn( + "The `testval` argument is deprecated; use `initval`.", + FutureWarning, + stacklevel=2, + ) + + if not isinstance(name, string_types): + raise TypeError(f"Name needs to be a string but got: {name}") + + if dims is not None and "shape" in kwargs: + raise ValueError( + f"Passing both `dims` ({dims}) and `shape` ({kwargs['shape']}) is not supported!" + ) + if dims is not None and "size" in kwargs: + raise ValueError( + f"Passing both `dims` ({dims}) and `size` ({kwargs['size']}) is not supported!" + ) + dims = convert_dims(dims) + + if rngs is None: + # Create a temporary rv to obtain number of rngs needed + temp_graph = cls.dist(*args, rngs=None, **kwargs) + rngs = [model.next_rng() for _ in cls.graph_rvs(temp_graph)] + elif not isinstance(rngs, (list, tuple)): + rngs = [rngs] + + # Create the RV without dims information, because that's not something tracked at the Aesara level. + # If necessary we'll later replicate to a different size implied by already known dims. + rv_out = cls.dist(*args, rngs=rngs, **kwargs) + ndim_actual = rv_out.ndim + resize_shape = None + + # # `dims` are only available with this API, because `.dist()` can be used + # # without a modelcontext and dims are not tracked at the Aesara level. + if dims is not None: + ndim_resize, resize_shape, dims = resize_from_dims(dims, ndim_actual, model) + elif observed is not None: + ndim_resize, resize_shape, observed = resize_from_observed(observed, ndim_actual) + + if resize_shape: + # A batch size was specified through `dims`, or implied by `observed`. + rv_out = cls.change_size( + rv=rv_out, + new_size=resize_shape, + ) + + rv_out = model.register_rv( + rv_out, + name, + observed, + total_size, + dims=dims, + transform=transform, + initval=initval, + ) + + # TODO: Refactor this + # add in pretty-printing support + rv_out.str_repr = lambda *args, **kwargs: name + rv_out._repr_latex_ = f"\\text{name}" + # rv_out.str_repr = types.MethodType(str_for_dist, rv_out) + # rv_out._repr_latex_ = types.MethodType( + # functools.partial(str_for_dist, formatting="latex"), rv_out + # ) + + return rv_out + + @classmethod + def dist( + cls, + dist_params, + *, + shape: Optional[Shape] = None, + size: Optional[Size] = None, + **kwargs, + ) -> TensorVariable: + """Creates a TensorVariable corresponding to the `cls` symbolic distribution. + + Parameters + ---------- + dist_params : array-like + The inputs to the `RandomVariable` `Op`. + shape : int, tuple, Variable, optional + A tuple of sizes for each dimension of the new RV. + + An Ellipsis (...) may be inserted in the last position to short-hand refer to + all the dimensions that the RV would get if no shape/size/dims were passed at all. + size : int, tuple, Variable, optional + For creating the RV like in Aesara/NumPy. + + Returns + ------- + var : TensorVariable + """ + + if "testval" in kwargs: + kwargs.pop("testval") + warnings.warn( + "The `.dist(testval=...)` argument is deprecated and has no effect. " + "Initial values for sampling/optimization can be specified with `initval` in a modelcontext. " + "For using Aesara's test value features, you must assign the `.tag.test_value` yourself.", + FutureWarning, + stacklevel=2, + ) + if "initval" in kwargs: + raise TypeError( + "Unexpected keyword argument `initval`. " + "This argument is not available for the `.dist()` API." + ) + + if "dims" in kwargs: + raise NotImplementedError("The use of a `.dist(dims=...)` API is not supported.") + if shape is not None and size is not None: + raise ValueError( + f"Passing both `shape` ({shape}) and `size` ({size}) is not supported!" + ) + + shape = convert_shape(shape) + size = convert_size(size) + + create_size, ndim_expected, ndim_batch, ndim_supp = find_size( + shape=shape, size=size, ndim_supp=cls.ndim_supp(*dist_params) + ) + # Create the RV with a `size` right away. + # This is not necessarily the final result. + graph = cls.rv_op(*dist_params, size=create_size, **kwargs) + graph = maybe_resize( + graph, + cls.rv_op, + dist_params, + ndim_expected, + ndim_batch, + ndim_supp, + shape, + size, + change_rv_size_fn=cls.change_size, + **kwargs, + ) + + rngs = kwargs.pop("rngs", None) + if rngs is not None: + graph_rvs = cls.graph_rvs(graph) + assert len(rngs) == len(graph_rvs) + for rng, rv_out in zip(rngs, graph_rvs): + if ( + rv_out.owner + and isinstance(rv_out.owner.op, RandomVariable) + and isinstance(rng, RandomStateSharedVariable) + and not getattr(rng, "default_update", None) + ): + # This tells `aesara.function` that the shared RNG variable + # is mutable, which--in turn--tells the `FunctionGraph` + # `Supervisor` feature to allow in-place updates on the variable. + # Without it, the `RandomVariable`s could not be optimized to allow + # in-place RNG updates, forcing all sample results from compiled + # functions to be the same on repeated evaluations. + new_rng = rv_out.owner.outputs[0] + rv_out.update = (rng, new_rng) + rng.default_update = new_rng + + # TODO: Create new attr error stating that these are not available for DerivedDistribution + # rv_out.logp = _make_nice_attr_error("rv.logp(x)", "pm.logp(rv, x)") + # rv_out.logcdf = _make_nice_attr_error("rv.logcdf(x)", "pm.logcdf(rv, x)") + # rv_out.random = _make_nice_attr_error("rv.random()", "rv.eval()") + return graph + + @singledispatch def _get_moment(op, rv, *rv_inputs) -> TensorVariable: raise NotImplementedError(f"Variable {rv} of type {op} has no get_moment implementation.") diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py index f74b61e7a0..28117c3353 100644 --- a/pymc/distributions/shape_utils.py +++ b/pymc/distributions/shape_utils.py @@ -20,6 +20,7 @@ import warnings +from functools import partial from typing import Optional, Sequence, Tuple, Union import numpy as np @@ -612,6 +613,8 @@ def maybe_resize( ndim_supp, shape, size, + *, + change_rv_size_fn=partial(change_rv_size, expand=True), **kwargs, ): """Resize a distribution if necessary. @@ -635,6 +638,8 @@ def maybe_resize( A tuple specifying the final shape of a distribution size : tuple A tuple specifying the size of a distribution + change_rv_size_fn: callable + A function that returns an equivalent RV with a different size Returns ------- @@ -647,7 +652,7 @@ def maybe_resize( if shape is not None and ndims_unexpected: if Ellipsis in shape: # Resize and we're done! - rv_out = change_rv_size(rv_var=rv_out, new_size=shape[:-1], expand=True) + rv_out = change_rv_size_fn(rv_var=rv_out, new_size=shape[:-1]) else: # This is rare, but happens, for example, with MvNormal(np.ones((2, 3)), np.eye(3), shape=(2, 3)). # Recreate the RV without passing `size` to created it with just the implied dimensions. @@ -656,7 +661,7 @@ def maybe_resize( # Now resize by any remaining "extra" dimensions that were not implied from support and parameters if rv_out.ndim < ndim_expected: expand_shape = shape[: ndim_expected - rv_out.ndim] - rv_out = change_rv_size(rv_var=rv_out, new_size=expand_shape, expand=True) + rv_out = change_rv_size_fn(rv_var=rv_out, new_size=expand_shape) if not rv_out.ndim == ndim_expected: raise ShapeError( f"Failed to create the RV with the expected dimensionality. " From 2cdf282875ab81b841c6582986bfca29cf465128 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 4 Jan 2022 12:56:09 +0100 Subject: [PATCH 3/4] Add Censored distributions --- RELEASE-NOTES.md | 1 + docs/source/api/distributions.rst | 5 +- docs/source/api/distributions/censored.rst | 9 ++ pymc/distributions/__init__.py | 2 + pymc/distributions/censored.py | 146 +++++++++++++++++++++ pymc/distributions/distribution.py | 7 + pymc/tests/test_distributions.py | 70 ++++++++++ pymc/util.py | 17 +++ 8 files changed, 255 insertions(+), 2 deletions(-) create mode 100644 docs/source/api/distributions/censored.rst create mode 100644 pymc/distributions/censored.py diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 300729abbd..42872f27ba 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -117,6 +117,7 @@ This includes API changes we did not warn about since at least `3.11.0` (2021-01 - With `pm.Data(..., mutable=True/False)`, or by using `pm.MutableData` vs. `pm.ConstantData` one can now create `TensorConstant` data variables. They can be more performant and compatible in situtations where a variable doesn't need to be changed via `pm.set_data()`. See [#5295](https://github.com/pymc-devs/pymc/pull/5295). - New named dimensions can be introduced to the model via `pm.Data(..., dims=...)`. For mutable data variables (see above) the lengths of these dimensions are symbolic, so they can be re-sized via `pm.set_data()`. - `pm.Data` now passes additional kwargs to `aesara.shared`/`at.as_tensor`. [#5098](https://github.com/pymc-devs/pymc/pull/5098). +- Univariate censored distributions are now available via `pm.Censored`. [#5169](https://github.com/pymc-devs/pymc/pull/5169) - ... diff --git a/docs/source/api/distributions.rst b/docs/source/api/distributions.rst index 91d4ceee65..47f0fc1a4f 100644 --- a/docs/source/api/distributions.rst +++ b/docs/source/api/distributions.rst @@ -6,10 +6,11 @@ Distributions distributions/continuous distributions/discrete - distributions/logprob distributions/multivariate distributions/mixture - distributions/simulator distributions/timeseries + distributions/censored + distributions/simulator distributions/transforms + distributions/logprob distributions/utilities diff --git a/docs/source/api/distributions/censored.rst b/docs/source/api/distributions/censored.rst new file mode 100644 index 0000000000..73f1c9363c --- /dev/null +++ b/docs/source/api/distributions/censored.rst @@ -0,0 +1,9 @@ +******** +Censored +******** + +.. currentmodule:: pymc +.. autosummary:: + :toctree: generated + + Censored diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py index bb91ea23c2..2aff0c18c0 100644 --- a/pymc/distributions/__init__.py +++ b/pymc/distributions/__init__.py @@ -21,6 +21,7 @@ ) from pymc.distributions.bound import Bound +from pymc.distributions.censored import Censored from pymc.distributions.continuous import ( AsymmetricLaplace, Beta, @@ -187,6 +188,7 @@ "Rice", "Moyal", "Simulator", + "Censored", "CAR", "PolyaGamma", "logpt", diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py new file mode 100644 index 0000000000..384dcb4c22 --- /dev/null +++ b/pymc/distributions/censored.py @@ -0,0 +1,146 @@ +# Copyright 2020 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import aesara.tensor as at +import numpy as np + +from aesara.scalar import Clip +from aesara.tensor import TensorVariable +from aesara.tensor.random.op import RandomVariable + +from pymc.distributions.distribution import SymbolicDistribution, _get_moment +from pymc.util import check_dist_not_registered + + +class Censored(SymbolicDistribution): + r""" + Censored distribution + + The pdf of a censored distribution is + + .. math:: + + \begin{cases} + 0 & \text{for } x < lower, \\ + \text{CDF}(lower, dist) & \text{for } x = lower, \\ + \text{PDF}(x, dist) & \text{for } lower < x < upper, \\ + 1-\text{CDF}(upper, dist) & \text {for} x = upper, \\ + 0 & \text{for } x > upper, + \end{cases} + + + Parameters + ---------- + dist: PyMC unnamed distribution + PyMC distribution created via the `.dist()` API, which will be censored. This + distribution must be univariate and have a logcdf method implemented. + lower: float or None + Lower (left) censoring point. If `None` the distribution will not be left censored + upper: float or None + Upper (right) censoring point. If `None`, the distribution will not be right censored. + + + Examples + -------- + .. code-block:: python + + with pm.Model(): + normal_dist = pm.Normal.dist(mu=0.0, sigma=1.0) + censored_normal = pm.Censored("censored_normal", normal_dist, lower=-1, upper=1) + """ + + @classmethod + def dist(cls, dist, lower, upper, **kwargs): + if not isinstance(dist, TensorVariable) or not isinstance(dist.owner.op, RandomVariable): + raise ValueError( + f"Censoring dist must be a distribution created via the `.dist()` API, got {type(dist)}" + ) + if dist.owner.op.ndim_supp > 0: + raise NotImplementedError( + "Censoring of multivariate distributions has not been implemented yet" + ) + check_dist_not_registered(dist) + return super().dist([dist, lower, upper], **kwargs) + + @classmethod + def rv_op(cls, dist, lower=None, upper=None, size=None, rngs=None): + if lower is None: + lower = at.constant(-np.inf) + if upper is None: + upper = at.constant(np.inf) + + # Censoring is achieved by clipping the base distribution between lower and upper + rv_out = at.clip(dist, lower, upper) + + # Reference nodes to facilitate identification in other classmethods, without + # worring about possible dimshuffles + rv_out.tag.dist = dist + rv_out.tag.lower = lower + rv_out.tag.upper = upper + + if size is not None: + rv_out = cls.change_size(rv_out, size) + if rngs is not None: + rv_out = cls.change_rngs(rv_out, rngs) + + return rv_out + + @classmethod + def ndim_supp(cls, *dist_params): + return 0 + + @classmethod + def change_size(cls, rv, new_size): + dist_node = rv.tag.dist.owner + lower = rv.tag.lower + upper = rv.tag.upper + rng, old_size, dtype, *dist_params = dist_node.inputs + new_dist = dist_node.op.make_node(rng, new_size, dtype, *dist_params).default_output() + return cls.rv_op(new_dist, lower, upper) + + @classmethod + def change_rngs(cls, rv, new_rngs): + (new_rng,) = new_rngs + dist_node = rv.tag.dist.owner + lower = rv.tag.lower + upper = rv.tag.upper + olg_rng, size, dtype, *dist_params = dist_node.inputs + new_dist = dist_node.op.make_node(new_rng, size, dtype, *dist_params).default_output() + return cls.rv_op(new_dist, lower, upper) + + @classmethod + def graph_rvs(cls, rv): + return (rv.tag.dist,) + + +@_get_moment.register(Clip) +def get_moment_censored(op, rv, dist, lower, upper): + moment = at.switch( + at.eq(lower, -np.inf), + at.switch( + at.isinf(upper), + # lower = -inf, upper = inf + 0, + # lower = -inf, upper = x + upper - 1, + ), + at.switch( + at.eq(upper, np.inf), + # lower = x, upper = inf + lower + 1, + # lower = x, upper = x + (lower + upper) / 2, + ), + ) + moment = at.full_like(dist, moment) + return moment diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index cf9d87eb0b..86a1c1e7fb 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -26,6 +26,7 @@ from aeppl.logprob import _logcdf, _logprob from aesara import tensor as at from aesara.tensor.basic import as_tensor_variable +from aesara.tensor.elemwise import Elemwise from aesara.tensor.random.op import RandomVariable from aesara.tensor.random.var import RandomStateSharedVariable from aesara.tensor.var import TensorVariable @@ -628,6 +629,12 @@ def get_moment(rv: TensorVariable) -> TensorVariable: return _get_moment(rv.owner.op, rv, *rv.owner.inputs).astype(rv.dtype) +@_get_moment.register(Elemwise) +def _get_moment_elemwise(op, rv, *dist_params): + """For Elemwise Ops, dispatch on respective scalar_op""" + return _get_moment(op.scalar_op, rv, *dist_params) + + class Discrete(Distribution): """Base class for discrete distributions""" diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py index 0095638e57..efe42006f4 100644 --- a/pymc/tests/test_distributions.py +++ b/pymc/tests/test_distributions.py @@ -3275,3 +3275,73 @@ def logp(value, mu): ).shape == to_tuple(size) ) + + +class TestCensored: + @pytest.mark.parametrize("censored", (False, True)) + def test_censored_workflow(self, censored): + # Based on pymc-examples/censored_data + rng = np.random.default_rng(1234) + size = 500 + true_mu = 13.0 + true_sigma = 5.0 + + # Set censoring limits + low = 3.0 + high = 16.0 + + # Draw censored samples + data = rng.normal(true_mu, true_sigma, size) + data[data <= low] = low + data[data >= high] = high + + with pm.Model(rng_seeder=17092021) as m: + mu = pm.Normal( + "mu", + mu=((high - low) / 2) + low, + sigma=(high - low) / 2.0, + initval="moment", + ) + sigma = pm.HalfNormal("sigma", sigma=(high - low) / 2.0, initval="moment") + observed = pm.Censored( + "observed", + pm.Normal.dist(mu=mu, sigma=sigma), + lower=low if censored else None, + upper=high if censored else None, + observed=data, + ) + + prior_pred = pm.sample_prior_predictive() + posterior = pm.sample(tune=500, draws=500) + posterior_pred = pm.sample_posterior_predictive(posterior) + + expected = True if censored else False + assert (9 < prior_pred.prior_predictive.mean() < 10) == expected + assert (13 < posterior.posterior["mu"].mean() < 14) == expected + assert (4.5 < posterior.posterior["sigma"].mean() < 5.5) == expected + assert (12 < posterior_pred.posterior_predictive.mean() < 13) == expected + + def test_censored_invalid_dist(self): + with pm.Model(): + invalid_dist = pm.Normal + with pytest.raises( + ValueError, + match=r"Censoring dist must be a distribution created via the", + ): + x = pm.Censored("x", invalid_dist, lower=None, upper=None) + + with pm.Model(): + mv_dist = pm.Dirichlet.dist(a=[1, 1, 1]) + with pytest.raises( + NotImplementedError, + match="Censoring of multivariate distributions has not been implemented yet", + ): + x = pm.Censored("x", mv_dist, lower=None, upper=None) + + with pm.Model(): + registered_dist = pm.Normal("dist") + with pytest.raises( + ValueError, + match="The dist dist was already registered in the current model", + ): + x = pm.Censored("x", registered_dist, lower=None, upper=None) diff --git a/pymc/util.py b/pymc/util.py index 92f2462e1f..f218368854 100644 --- a/pymc/util.py +++ b/pymc/util.py @@ -332,3 +332,20 @@ def cf(self): return cf return cachedmethod(self_cache_fn(f.__name__), key=hash_key)(f) + + +def check_dist_not_registered(dist, model=None): + """Check that a dist is not registered in the model already""" + from pymc.model import modelcontext + + try: + model = modelcontext(None) + except TypeError: + pass + else: + if dist in model.basic_RVs: + raise ValueError( + f"The dist {dist} was already registered in the current model.\n" + f"You should use an unregistered (unnamed) distribution created via " + f"the `.dist()` API instead, such as:\n`dist=pm.Normal.dist(0, 1)`" + ) From 4fc7dc9f1dcb40a8d6d82927e4840e52344183e9 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 4 Jan 2022 13:27:54 +0100 Subject: [PATCH 4/4] Small fixes to pm.Bound * Fix invalid code example in docstrings * Rename distribution parameter to dist * Use `check_dist_not_registered` helper --- pymc/distributions/bound.py | 60 +++++++++++++------------------- pymc/tests/test_distributions.py | 2 +- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/pymc/distributions/bound.py b/pymc/distributions/bound.py index 076efebd60..6b0b548eb6 100644 --- a/pymc/distributions/bound.py +++ b/pymc/distributions/bound.py @@ -26,6 +26,7 @@ from pymc.distributions.logprob import logp from pymc.distributions.shape_utils import to_tuple from pymc.model import modelcontext +from pymc.util import check_dist_not_registered __all__ = ["Bound"] @@ -144,8 +145,9 @@ class Bound: Parameters ---------- - distribution: pymc distribution - Distribution to be transformed into a bounded distribution. + dist: PyMC unnamed distribution + Distribution to be transformed into a bounded distribution created via the + `.dist()` API. lower: float or array like, optional Lower bound of the distribution. upper: float or array like, optional @@ -156,15 +158,15 @@ class Bound: .. code-block:: python with pm.Model(): - normal_dist = Normal.dist(mu=0.0, sigma=1.0, initval=-0.5) - negative_normal = pm.Bound(normal_dist, upper=0.0) + normal_dist = pm.Normal.dist(mu=0.0, sigma=1.0) + negative_normal = pm.Bound("negative_normal", normal_dist, upper=0.0) """ def __new__( cls, name, - distribution, + dist, lower=None, upper=None, size=None, @@ -174,7 +176,7 @@ def __new__( **kwargs, ): - cls._argument_checks(distribution, **kwargs) + cls._argument_checks(dist, **kwargs) if dims is not None: model = modelcontext(None) @@ -185,12 +187,12 @@ def __new__( raise ValueError("Given dims do not exist in model coordinates.") lower, upper, initval = cls._set_values(lower, upper, size, shape, initval) - distribution.tag.ignore_logprob = True + dist.tag.ignore_logprob = True - if isinstance(distribution.owner.op, Continuous): + if isinstance(dist.owner.op, Continuous): res = _ContinuousBounded( name, - [distribution, lower, upper], + [dist, lower, upper], initval=floatX(initval), size=size, shape=shape, @@ -199,7 +201,7 @@ def __new__( else: res = _DiscreteBounded( name, - [distribution, lower, upper], + [dist, lower, upper], initval=intX(initval), size=size, shape=shape, @@ -210,7 +212,7 @@ def __new__( @classmethod def dist( cls, - distribution, + dist, lower=None, upper=None, size=None, @@ -218,12 +220,12 @@ def dist( **kwargs, ): - cls._argument_checks(distribution, **kwargs) + cls._argument_checks(dist, **kwargs) lower, upper, initval = cls._set_values(lower, upper, size, shape, initval=None) - distribution.tag.ignore_logprob = True - if isinstance(distribution.owner.op, Continuous): + dist.tag.ignore_logprob = True + if isinstance(dist.owner.op, Continuous): res = _ContinuousBounded.dist( - [distribution, lower, upper], + [dist, lower, upper], size=size, shape=shape, **kwargs, @@ -231,7 +233,7 @@ def dist( res.tag.test_value = floatX(initval) else: res = _DiscreteBounded.dist( - [distribution, lower, upper], + [dist, lower, upper], size=size, shape=shape, **kwargs, @@ -240,7 +242,7 @@ def dist( return res @classmethod - def _argument_checks(cls, distribution, **kwargs): + def _argument_checks(cls, dist, **kwargs): if "observed" in kwargs: raise ValueError( "Observed Bound distributions are not supported. " @@ -249,7 +251,7 @@ def _argument_checks(cls, distribution, **kwargs): "with the cumulative probability function." ) - if not isinstance(distribution, TensorVariable): + if not isinstance(dist, TensorVariable): raise ValueError( "Passing a distribution class to `Bound` is no longer supported.\n" "Please pass the output of a distribution instantiated via the " @@ -257,26 +259,14 @@ def _argument_checks(cls, distribution, **kwargs): '`pm.Bound("bound", pm.Normal.dist(0, 1), lower=0)`' ) - try: - model = modelcontext(None) - except TypeError: - pass - else: - if distribution in model.basic_RVs: - raise ValueError( - f"The distribution passed into `Bound` was already registered " - f"in the current model.\nYou should pass an unregistered " - f"(unnamed) distribution created via the `.dist()` API, such as:\n" - f'`pm.Bound("bound", pm.Normal.dist(0, 1), lower=0)`' - ) - - if distribution.owner.op.ndim_supp != 0: + check_dist_not_registered(dist) + + if dist.owner.op.ndim_supp != 0: raise NotImplementedError("Bounding of MultiVariate RVs is not yet supported.") - if not isinstance(distribution.owner.op, (Discrete, Continuous)): + if not isinstance(dist.owner.op, (Discrete, Continuous)): raise ValueError( - f"`distribution` {distribution} must be a Discrete or Continuous" - " distribution subclass" + f"`distribution` {dist} must be a Discrete or Continuous" " distribution subclass" ) @classmethod diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py index efe42006f4..bff0b86dbf 100644 --- a/pymc/tests/test_distributions.py +++ b/pymc/tests/test_distributions.py @@ -2701,7 +2701,7 @@ def test_arguments_checks(self): with pytest.raises(ValueError, match=msg): pm.Bound("bound", x, dims="random_dims") - msg = "The distribution passed into `Bound` was already registered" + msg = "The dist x was already registered in the current model" with pm.Model() as m: x = pm.Normal("x", 0, 1) with pytest.raises(ValueError, match=msg):