Merge pull request #3285 from lucianopaz/iss3271

twiecki · web-flow · commit ce9e98d38692 · 2018-12-05T11:55:37.000+01:00
Fix for #3271
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -28,6 +28,7 @@
 - Refactor SMC and properly compute marginal likelihood (#3124)
 - Removed use of deprecated `ymin` keyword in matplotlib's `Axes.set_ylim` (#3279)
 - Fix for #3210. Now `distribution.draw_values(params)`, will draw the `params` values from their joint probability distribution and not from combinations of their marginals (Refer to PR #3273).
+- Rewrote `Multinomial._random` method to better handle shape broadcasting (#3271)
 
 ### Deprecations
 
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
@@ -535,37 +535,93 @@ def _random(self, n, p, size=None):
         # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317)
         p = p.astype('float64')
         # Now, re-normalize all of the values in float64 precision. This is done inside the conditionals
-        if size == p.shape:
-            size = None
-        elif size[-len(p.shape):] == p.shape:
-            size = size[:len(size) - len(p.shape)]
 
-        n_dim = n.squeeze().ndim
-
-        if (n_dim == 0) and (p.ndim == 1):
-            p = p / p.sum()
-            randnum = np.random.multinomial(n, p.squeeze(), size=size)
-        elif (n_dim == 0) and (p.ndim > 1):
-            p = p / p.sum(axis=1, keepdims=True)
-            randnum = np.asarray([
-                np.random.multinomial(n.squeeze(), pp, size=size)
-                for pp in p
-            ])
-            randnum = np.moveaxis(randnum, 1, 0)
-        elif (n_dim > 0) and (p.ndim == 1):
+        # np.random.multinomial needs `n` to be a scalar int and `p` a
+        # sequence
+        if p.ndim == 1 and (n.ndim == 0 or (n.ndim == 1 and n.shape[0] == 1)):
+            # If `n` is already a scalar and `p` is a sequence, then just
+            # return np.multinomial with some size handling
             p = p / p.sum()
-            randnum = np.asarray([
-                np.random.multinomial(nn, p.squeeze(), size=size)
-                for nn in n
-            ])
-            randnum = np.moveaxis(randnum, 1, 0)
+            if size is not None:
+                if size == p.shape:
+                    size = None
+                elif size[-len(p.shape):] == p.shape:
+                    size = size[:len(size) - len(p.shape)]
+            randnum = np.random.multinomial(n, p, size=size)
+            return randnum.astype(original_dtype)
+        # The shapes of `p` and `n` must be broadcasted by hand depending on
+        # their ndim. We will assume that the last axis of the `p` array will
+        # be the sequence to feed into np.random.multinomial. The other axis
+        # will only have to be iterated over.
+        if n.ndim == p.ndim:
+            # p and n have the same ndim, so n.shape[-1] must be 1
+            if n.shape[-1] != 1:
+                raise ValueError('If n and p have the same number of '
+                                 'dimensions, the last axis of n must be '
+                                 'have len 1. Got {} instead.\n'
+                                 'n.shape = {}\n'
+                                 'p.shape = {}.'.format(n.shape[-1],
+                                                        n.shape,
+                                                        p.shape))
+            n_p_shape = np.broadcast(np.empty(p.shape[:-1]),
+                                     np.empty(n.shape[:-1])).shape
+            p = np.broadcast_to(p, n_p_shape + (p.shape[-1],))
+            n = np.broadcast_to(n, n_p_shape + (1,))
+        elif n.ndim == p.ndim - 1:
+            # n has the number of dimensions of p for the iteration, these must
+            # broadcast together
+            n_p_shape = np.broadcast(np.empty(p.shape[:-1]),
+                                     n).shape
+            p = np.broadcast_to(p, n_p_shape + (p.shape[-1],))
+            n = np.broadcast_to(n, n_p_shape + (1,))
+        elif p.ndim == 1:
+            # p only has the sequence array. We extend it with the dimensions
+            # of n
+            n_p_shape = n.shape
+            p = np.broadcast_to(p, n_p_shape + (p.shape[-1],))
+            n = np.broadcast_to(n, n_p_shape + (1,))
+        elif n.ndim == 0 or (n.dim == 1 and n.shape[0] == 1):
+            # n is a scalar. We extend it with the dimensions of p
+            n_p_shape = p.shape[:-1]
+            n = np.broadcast_to(n, n_p_shape + (1,))
+        else:
+            # There is no clear rule to broadcast p and n so we raise an error
+            raise ValueError('Incompatible shapes of n and p.\n'
+                             'n.shape = {}\n'
+                             'p.shape = {}'.format(n.shape, p.shape))
+
+        # Check what happens with size
+        if size is not None:
+            if size == p.shape:
+                size = None
+                _size = 1
+            elif size[-len(p.shape):] == p.shape:
+                size = size[:len(size) - len(p.shape)]
+                _size = np.prod(size)
+            else:
+                _size = np.prod(size)
+        else:
+            _size = 1
+
+        # We now flatten p and n up to the last dimension
+        p_shape = p.shape
+        p = np.reshape(p, (np.prod(n_p_shape), -1))
+        n = np.reshape(n, (np.prod(n_p_shape), -1))
+        # We renormalize p
+        p = p / p.sum(axis=1, keepdims=True)
+        # We iterate calls to np.random.multinomial
+        randnum = np.asarray([
+            np.random.multinomial(nn, pp, size=_size)
+            for (nn, pp) in zip(n, p)
+        ])
+        # We swap the iteration axis with the _size axis
+        randnum = np.moveaxis(randnum, 1, 0)
+        # We reshape the random numbers to the corresponding size + p_shape
+        if size is None:
+            randnum = np.reshape(randnum, p_shape)
         else:
-            p = p / p.sum(axis=1, keepdims=True)
-            randnum = np.asarray([
-                np.random.multinomial(nn, pp, size=size)
-                for (nn, pp) in zip(n, p)
-            ])
-            randnum = np.moveaxis(randnum, 1, 0)
+            randnum = np.reshape(randnum, size + p_shape)
+        # We cast back to the original dtype
         return randnum.astype(original_dtype)
 
     def random(self, point=None, size=None):
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
@@ -370,6 +370,22 @@ def test_multivariate(self):
         assert m.random(size=10).shape == (10, 4)
         assert trace['m'].shape == (10, 4)
 
+    def test_multivariate2(self):
+        # Added test for issue #3271
+        mn_data = np.random.multinomial(n=100, pvals=[1/6.]*6, size=10)
+        with pm.Model() as dm_model:
+            probs = pm.Dirichlet('probs', a=np.ones(6), shape=6)
+            obs = pm.Multinomial('obs', n=100, p=probs, observed=mn_data)
+            burned_trace = pm.sample(20, tune=10, cores=1)
+        sim_priors = pm.sample_prior_predictive(samples=20,
+                                                model=dm_model)
+        sim_ppc = pm.sample_posterior_predictive(burned_trace,
+                                                 samples=20,
+                                                 model=dm_model)
+        assert sim_priors['probs'].shape == (20, 6)
+        assert sim_priors['obs'].shape == (20, 6)
+        assert sim_ppc['obs'].shape == (20,) + obs.distribution.shape
+
     def test_layers(self):
         with pm.Model() as model:
             a = pm.Uniform('a', lower=0, upper=1, shape=10)