From e983c9a689aa823713f763941b3afbb3e0d1a148 Mon Sep 17 00:00:00 2001 From: "Oriol (ZBook)" Date: Fri, 22 Jan 2021 23:30:42 +0200 Subject: [PATCH 1/4] first try to observed/givens split --- pymc3/distributions/distribution.py | 37 ++++++++++++++++++++++++----- pymc3/model.py | 6 +++-- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index 8178ae0d22..ecb912bccf 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -92,10 +92,22 @@ def __new__(cls, name, *args, **kwargs): if not isinstance(name, string_types): raise TypeError(f"Name needs to be a string but got: {name}") - data = kwargs.pop("observed", None) - cls.data = data - if isinstance(data, ObservedRV) or isinstance(data, FreeRV): - raise TypeError("observed needs to be data but got: {}".format(type(data))) + observed_data = kwargs.pop("observed", None) + if isinstance(observed_data, ObservedRV) or isinstance(observed_data, FreeRV): + raise TypeError("observed needs to be data but got: {}".format(type(observed_data))) + given_data = kwargs.pop("givens", None) + if given_data is None: + cls.data = observed_data + elif observed_data is None: + cls.data = given_data + elif isinstance(observed_data, dict): + cls.data = {**observed_data, **given_data} + else: + raise ValueError( + "If both observed and givens argument are present, observed needs to " + f"be a dict but got: {type(observed_data)}" + ) + data = cls.data total_size = kwargs.pop("total_size", None) dims = kwargs.pop("dims", None) @@ -119,7 +131,7 @@ def __new__(cls, name, *args, **kwargs): dist = cls.dist(*args, **kwargs, shape=shape) else: dist = cls.dist(*args, **kwargs) - return model.Var(name, dist, data, total_size, dims=dims) + return model.Var(name, dist, data, total_size, dims=dims, givens=given_data) def __getnewargs__(self): return (_Unpickling,) @@ -358,6 +370,7 @@ def __init__( logp, shape=(), dtype=None, + givens=None, testval=0, random=None, wrap_random_with_dist_shape=True, @@ -379,6 +392,8 @@ def __init__( a value here. dtype: None, str (Optional) The dtype of the distribution. + givens : dict, optional + Model variables on which the DensityDist is conditioned. testval: number or array (Optional) The ``testval`` of the RV's tensor that follow the ``DensityDist`` distribution. @@ -525,9 +540,19 @@ def __init__( assert prior.shape == (10, 100, 3) """ + observed = kwargs.get("observed", None) + if not (isinstance(givens, dict) or givens is None): + raise TypeError(f"givens needs to be of type dict but got: {type(givens)}") + if isinstance(observed, dict) and isinstance(givens, dict): + intersection = givens.keys() & observed.keys() + if intersection: + raise ValueError( + f"{intersection} keys found in both givens and observed dicts but " + "they can not have repeated keys" + ) if dtype is None: dtype = theano.config.floatX - super().__init__(shape, dtype, testval, *args, **kwargs) + super().__init__(shape, dtype, testval, *args, **{**kwargs, "givens": givens}) self.logp = logp if type(self.logp) == types.MethodType: if PLATFORM != "linux": diff --git a/pymc3/model.py b/pymc3/model.py index 393c4d2f6a..b343074bde 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -1109,7 +1109,7 @@ def add_coords(self, coords): else: self.coords[name] = coords[name] - def Var(self, name, dist, data=None, total_size=None, dims=None): + def Var(self, name, dist, data=None, total_size=None, dims=None, givens=None): """Create and add (un)observed random variable to the model with an appropriate prior distribution. @@ -1161,6 +1161,7 @@ def Var(self, name, dist, data=None, total_size=None, dims=None): var = MultiObservedRV( name=name, data=data, + givens=givens, distribution=dist, total_size=total_size, model=self, @@ -1834,7 +1835,7 @@ class MultiObservedRV(Factor): Potentially partially observed. """ - def __init__(self, name, data, distribution, total_size=None, model=None): + def __init__(self, name, data, distribution, total_size=None, model=None, givens=None): """ Parameters ---------- @@ -1850,6 +1851,7 @@ def __init__(self, name, data, distribution, total_size=None, model=None): self.data = { name: as_tensor(data, name, model, distribution) for name, data in data.items() } + self.givens = givens self.missing_values = [ datum.missing_values for datum in self.data.values() if datum.missing_values is not None From 91d7bb908d506c73157065673b08479a5b92da41 Mon Sep 17 00:00:00 2001 From: "Oriol (ZBook)" Date: Sat, 23 Jan 2021 01:06:02 +0200 Subject: [PATCH 2/4] fix error messages and argument handling --- pymc3/distributions/distribution.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index ecb912bccf..f3d3f7cdb1 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -98,9 +98,17 @@ def __new__(cls, name, *args, **kwargs): given_data = kwargs.pop("givens", None) if given_data is None: cls.data = observed_data + elif not isinstance(given_data, dict): + raise TypeError(f"givens needs to be of type dict but got: {type(givens)}") elif observed_data is None: cls.data = given_data elif isinstance(observed_data, dict): + intersection = given_data.keys() & observed_data.keys() + if intersection: + raise ValueError( + f"{intersection} keys found in both givens and observed dicts but " + "they can not have repeated keys" + ) cls.data = {**observed_data, **given_data} else: raise ValueError( @@ -128,8 +136,12 @@ def __new__(cls, name, *args, **kwargs): # Some distributions do not accept shape=None if has_shape or shape is not None: + if "givens" in kwargs: + raise ValueError("givens found") dist = cls.dist(*args, **kwargs, shape=shape) else: + if "givens" in kwargs: + raise ValueError("givens found") dist = cls.dist(*args, **kwargs) return model.Var(name, dist, data, total_size, dims=dims, givens=given_data) @@ -540,19 +552,9 @@ def __init__( assert prior.shape == (10, 100, 3) """ - observed = kwargs.get("observed", None) - if not (isinstance(givens, dict) or givens is None): - raise TypeError(f"givens needs to be of type dict but got: {type(givens)}") - if isinstance(observed, dict) and isinstance(givens, dict): - intersection = givens.keys() & observed.keys() - if intersection: - raise ValueError( - f"{intersection} keys found in both givens and observed dicts but " - "they can not have repeated keys" - ) if dtype is None: dtype = theano.config.floatX - super().__init__(shape, dtype, testval, *args, **{**kwargs, "givens": givens}) + super().__init__(shape, dtype, testval, *args, **kwargs) self.logp = logp if type(self.logp) == types.MethodType: if PLATFORM != "linux": From 2705d9d6bfd6482ded3e773d347da3dc3cab7756 Mon Sep 17 00:00:00 2001 From: "Oriol (ZBook)" Date: Sat, 23 Jan 2021 03:43:43 +0200 Subject: [PATCH 3/4] working version of obs/given split --- pymc3/distributions/distribution.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index f3d3f7cdb1..3599fcd345 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -533,24 +533,6 @@ def __init__( the returned array of samples. It is the user's responsibility to wrap the callable to make it comply with PyMC3's interpretation of ``size``. - - - .. code-block:: python - - with pm.Model(): - mu = pm.Normal('mu', 0 , 1) - normal_dist = pm.Normal.dist(mu, 1, shape=3) - dens = pm.DensityDist( - 'density_dist', - normal_dist.logp, - observed=np.random.randn(100, 3), - shape=3, - random=stats.norm.rvs, - pymc3_size_interpretation=False, # Is True by default - ) - prior = pm.sample_prior_predictive(10)['density_dist'] - assert prior.shape == (10, 100, 3) - """ if dtype is None: dtype = theano.config.floatX From 0b79ef74b66ef8cd99d7da594ba6877c739923ad Mon Sep 17 00:00:00 2001 From: "Oriol (ZBook)" Date: Sun, 24 Jan 2021 04:15:04 +0200 Subject: [PATCH 4/4] add extra error message --- pymc3/distributions/distribution.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index 3599fcd345..057f7b7904 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -103,6 +103,16 @@ def __new__(cls, name, *args, **kwargs): elif observed_data is None: cls.data = given_data elif isinstance(observed_data, dict): + non_data_obs = { + key: type(value) + for key, value in observed_data.items() + if isinstance(value, ObservedRV) or isinstance(value, FreeRV) + } + if non_data_obs: + raise TypeError( + f"All values in observed dict need to be data but got: {non_data_obs}. " + "You may want to use the givens argument in DensityDist." + ) intersection = given_data.keys() & observed_data.keys() if intersection: raise ValueError( @@ -136,12 +146,8 @@ def __new__(cls, name, *args, **kwargs): # Some distributions do not accept shape=None if has_shape or shape is not None: - if "givens" in kwargs: - raise ValueError("givens found") dist = cls.dist(*args, **kwargs, shape=shape) else: - if "givens" in kwargs: - raise ValueError("givens found") dist = cls.dist(*args, **kwargs) return model.Var(name, dist, data, total_size, dims=dims, givens=given_data) @@ -382,7 +388,6 @@ def __init__( logp, shape=(), dtype=None, - givens=None, testval=0, random=None, wrap_random_with_dist_shape=True, @@ -404,8 +409,6 @@ def __init__( a value here. dtype: None, str (Optional) The dtype of the distribution. - givens : dict, optional - Model variables on which the DensityDist is conditioned. testval: number or array (Optional) The ``testval`` of the RV's tensor that follow the ``DensityDist`` distribution. @@ -430,6 +433,8 @@ def __init__( If ``True``, the shape of the random samples generate in the ``random`` method is checked with the expected return shape. This test is only performed if ``wrap_random_with_dist_shape is False``. + givens : dict, optional + Model variables on which the DensityDist is conditioned. args, kwargs: (Optional) These are passed to the parent class' ``__init__``.