Fix random sampling in Mixture (#3004)

Junpeng Lao · web-flow · commit 9e7495bf4d7c · 2018-06-14T09:48:10.000+02:00
* WIP fix mixture random

* Add test

* fix test

* remove shape of MultiObservedRV

* Fix test

Revert back to original, ie not setting the shape of observedRV

* Fix test

* Fix test, add kwarg to NormalMixture

Also updated dependent_density_regression notebook

* improve naming

* fix test
diff --git a/docs/source/notebooks/dependent_density_regression.ipynb b/docs/source/notebooks/dependent_density_regression.ipynb
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
@@ -158,18 +158,33 @@ def random_choice(*args, **kwargs):
                 return np.random.choice(k, p=w, *args, **kwargs)
 
         w = draw_values([self.w], point=point)[0]
-
+        comp_tmp = self._comp_samples(point=point, size=None)
+        if self.shape.size == 0:
+            distshape = np.asarray(np.broadcast(w, comp_tmp).shape)[..., :-1]
+        else:
+            distshape = self.shape
         w_samples = generate_samples(random_choice,
                                      w=w,
                                      broadcast_shape=w.shape[:-1] or (1,),
-                                     dist_shape=self.shape,
+                                     dist_shape=distshape,
                                      size=size).squeeze()
-        comp_samples = self._comp_samples(point=point, size=size)
-
-        if comp_samples.ndim > 1:
-            return np.squeeze(comp_samples[np.arange(w_samples.size), w_samples])
+        if (size is None) or (distshape.size == 0):
+            comp_samples = self._comp_samples(point=point, size=size)
+            if comp_samples.ndim > 1:
+                samples = np.squeeze(comp_samples[np.arange(w_samples.size), ..., w_samples])
+            else:
+                samples = np.squeeze(comp_samples[w_samples])
         else:
-            return np.squeeze(comp_samples[w_samples])
+            samples = np.zeros((size,)+tuple(distshape))
+            for i in range(size):
+                w_tmp = w_samples[i, :]
+                comp_tmp = self._comp_samples(point=point, size=None)
+                if comp_tmp.ndim > 1:
+                    samples[i, :] = np.squeeze(comp_tmp[np.arange(w_tmp.size), ..., w_tmp])
+                else:
+                    samples[i, :] = np.squeeze(comp_tmp[w_tmp])
+
+        return samples
 
 
 class NormalMixture(Mixture):
@@ -197,22 +212,22 @@ class NormalMixture(Mixture):
         the component standard deviations
     tau : array of floats
         the component precisions
+    comp_shape : shape of the Normal component
+        notice that it should be different than the shape
+        of the mixture distribution, with one axis being
+        the number of components.
 
     Note: You only have to pass in sd or tau, but not both.
     """
 
-    def __init__(self, w, mu, *args, **kwargs):
+    def __init__(self, w, mu, comp_shape=(), *args, **kwargs):
         _, sd = get_tau_sd(tau=kwargs.pop('tau', None),
                            sd=kwargs.pop('sd', None))
 
-        distshape = np.broadcast(mu, sd).shape
         self.mu = mu = tt.as_tensor_variable(mu)
         self.sd = sd = tt.as_tensor_variable(sd)
 
-        if not distshape:
-            distshape = np.broadcast(mu.tag.test_value, sd.tag.test_value).shape
-
-        super(NormalMixture, self).__init__(w, Normal.dist(mu, sd=sd, shape=distshape),
+        super(NormalMixture, self).__init__(w, Normal.dist(mu, sd=sd, shape=comp_shape),
                                             *args, **kwargs)
 
     def _repr_latex_(self, name=None, dist=None):
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -759,17 +759,79 @@ def ref_rand(size, w, mu, sd):
         pymc3_random(pm.NormalMixture, {'w': Simplex(2),
                      'mu': Domain([[.05, 2.5], [-5., 1.]], edges=(None, None)),
                      'sd': Domain([[1, 1], [1.5, 2.]], edges=(None, None))},
+                     extra_args={'comp_shape': 2},
                      size=1000,
                      ref_rand=ref_rand)
         pymc3_random(pm.NormalMixture, {'w': Simplex(3),
                      'mu': Domain([[-5., 1., 2.5]], edges=(None, None)),
                      'sd': Domain([[1.5, 2., 3.]], edges=(None, None))},
+                     extra_args={'comp_shape': 3},
                      size=1000,
                      ref_rand=ref_rand)
 
+
+def test_mixture_random_shape():
+    # test the shape broadcasting in mixture random
+    y = np.concatenate([nr.poisson(5, size=10),
+                        nr.poisson(9, size=10)])
+    with pm.Model() as m:
+        comp0 = pm.Poisson.dist(mu=np.ones(2))
+        w0 = pm.Dirichlet('w0', a=np.ones(2))
+        like0 = pm.Mixture('like0',
+                           w=w0,
+                           comp_dists=comp0,
+                           shape=y.shape,
+                           observed=y)
+
+        comp1 = pm.Poisson.dist(mu=np.ones((20, 2)),
+                                shape=(20, 2))
+        w1 = pm.Dirichlet('w1', a=np.ones(2))
+        like1 = pm.Mixture('like1',
+                           w=w1,
+                           comp_dists=comp1, observed=y)
+
+        comp2 = pm.Poisson.dist(mu=np.ones(2))
+        w2 = pm.Dirichlet('w2',
+                          a=np.ones(2),
+                          shape=(20, 2))
+        like2 = pm.Mixture('like2',
+                           w=w2,
+                           comp_dists=comp2,
+                           observed=y)
+
+        comp3 = pm.Poisson.dist(mu=np.ones(2),
+                                shape=(20, 2))
+        w3 = pm.Dirichlet('w3',
+                          a=np.ones(2),
+                          shape=(20, 2))
+        like3 = pm.Mixture('like3',
+                           w=w3,
+                           comp_dists=comp3,
+                           observed=y)
+
+    rand0 = like0.distribution.random(m.test_point, size=100)
+    assert rand0.shape == (100, 20)
+
+    rand1 = like1.distribution.random(m.test_point, size=100)
+    assert rand1.shape == (100, 20)
+
+    rand2 = like2.distribution.random(m.test_point, size=100)
+    assert rand2.shape == (100, 20)
+
+    rand3 = like3.distribution.random(m.test_point, size=100)
+    assert rand3.shape == (100, 20)
+
+    with m:
+        ppc = pm.sample_ppc([m.test_point], samples=200)
+    assert ppc['like0'].shape == (200, 20)
+    assert ppc['like1'].shape == (200, 20)
+    assert ppc['like2'].shape == (200, 20)
+    assert ppc['like3'].shape == (200, 20)
+
+
 def test_density_dist_with_random_sampleable():
     with pm.Model() as model:
-        mu = pm.Normal('mu',0,1)
+        mu = pm.Normal('mu', 0, 1)
         normal_dist = pm.Normal.dist(mu, 1)
         pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100), random=normal_dist.random)
         trace = pm.sample(100)
@@ -781,7 +843,7 @@ def test_density_dist_with_random_sampleable():
 
 def test_density_dist_without_random_not_sampleable():
     with pm.Model() as model:
-        mu = pm.Normal('mu',0,1)
+        mu = pm.Normal('mu', 0, 1)
         normal_dist = pm.Normal.dist(mu, 1)
         pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100))
         trace = pm.sample(100)
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
@@ -140,12 +140,14 @@ def test_empty_model():
             pm.sample()
         error.match('any free variables')
 
+
 def test_partial_trace_sample():
     with pm.Model() as model:
         a = pm.Normal('a', mu=0, sd=1)
         b = pm.Normal('b', mu=0, sd=1)
         trace = pm.sample(trace=[a])
 
+
 @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
 class TestNamedSampling(SeededTest):
     def test_shared_named(self):
@@ -235,7 +237,34 @@ def test_normal_vector(self):
             trace = pm.sample()
 
         with model:
-	    # test list input
+            # test list input
+            ppc0 = pm.sample_ppc([model.test_point], samples=10)
+            ppc = pm.sample_ppc(trace, samples=10, vars=[])
+            assert len(ppc) == 0
+            ppc = pm.sample_ppc(trace, samples=10, vars=[a])
+            assert 'a' in ppc
+            assert ppc['a'].shape == (10, 2)
+
+            ppc = pm.sample_ppc(trace, samples=10, vars=[a], size=4)
+            assert 'a' in ppc
+            assert ppc['a'].shape == (10, 4, 2)
+
+    def test_vector_observed(self):
+        # This test was initially created to test whether observedRVs
+        # can assert the shape automatically from the observed data.
+        # It can make sample_ppc correct for RVs similar to below (i.e.,
+        # some kind of broadcasting is involved). However, doing so makes
+        # the application with `theano.shared` array as observed data
+        # invalid (after the `.set_value` the RV shape could change).
+        with pm.Model() as model:
+            mu = pm.Normal('mu', mu=0, sd=1)
+            a = pm.Normal('a', mu=mu, sd=1,
+                          shape=2,  # necessary to make ppc sample correct
+                          observed=np.array([0., 1.]))
+            trace = pm.sample()
+
+        with model:
+            # test list input
             ppc0 = pm.sample_ppc([model.test_point], samples=10)
             ppc = pm.sample_ppc(trace, samples=10, vars=[])
             assert len(ppc) == 0
@@ -254,7 +283,7 @@ def test_sum_normal(self):
             trace = pm.sample()
 
         with model:
-	    # test list input
+            # test list input
             ppc0 = pm.sample_ppc([model.test_point], samples=10)
             ppc = pm.sample_ppc(trace, samples=1000, vars=[b])
             assert len(ppc) == 1
@@ -263,6 +292,7 @@ def test_sum_normal(self):
             _, pval = stats.kstest(ppc['b'], stats.norm(scale=scale).cdf)
             assert pval > 0.001
 
+
 class TestSamplePPCW(SeededTest):
     def test_sample_ppc_w(self):
         data0 = np.random.normal(0, 1, size=500)