Merge branch 'master' into rename_sd_to_sigma

twiecki · web-flow · commit fcbdf9683154 · 2018-12-23T01:14:53.000+01:00
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -6,7 +6,10 @@
 
 ### Maintenance
 
-- All occurances of `sd` as a parameter has been replaced with `sigma` for consistency. `sd` will continue to be available for backwards compatibility.
+- All occurances of `sd` as a parameter name have been renamed to `sigma`. `sd` will continue to function for backwards compatibility.
+- Made `BrokenPipeError` for parallel sampling more verbose on Windows.
+- Added the `broadcast_distribution_samples` function that helps broadcasting arrays of drawn samples, taking into account the requested `size` and the inferred distribution shape. This sometimes is needed by distributions that call several `rvs` separately within their `random` method, such as the `ZeroInflatedPoisson` (Fix issue #3310).
+- The `Wald`, `Kumaraswamy`, `LogNormal`, `Pareto`, `Cauchy`, `HalfCauchy`, `Weibull` and `ExGaussian` distributions `random` method used a hidden `_random` function that was written with scalars in mind. This could potentially lead to artificial correlations between random draws. Added shape guards and broadcasting of the distribution samples to prevent this (Similar to issue #3310).
 
 ### Deprecations
 
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
@@ -19,7 +19,8 @@
     alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow,
     normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue,
 )
-from .distribution import Continuous, draw_values, generate_samples
+from .distribution import (Continuous, draw_values, generate_samples,
+                           broadcast_distribution_samples)
 
 __all__ = ['Uniform', 'Flat', 'HalfFlat', 'Normal', 'TruncatedNormal', 'Beta',
            'Kumaraswamy', 'Exponential', 'Laplace', 'StudentT', 'Cauchy',
@@ -964,6 +965,8 @@ def random(self, point=None, size=None):
         """
         mu, lam, alpha = draw_values([self.mu, self.lam, self.alpha],
                                      point=point, size=size)
+        mu, lam, alpha = broadcast_distribution_samples([mu, lam, alpha],
+                                                        size=size)
         return generate_samples(self._random,
                                 mu, lam, alpha,
                                 dist_shape=self.shape,
@@ -1293,6 +1296,7 @@ def random(self, point=None, size=None):
         """
         a, b = draw_values([self.a, self.b],
                            point=point, size=size)
+        a, b = broadcast_distribution_samples([a, b], size=size)
         return generate_samples(self._random, a, b,
                                 dist_shape=self.shape,
                                 size=size)
@@ -1669,6 +1673,7 @@ def random(self, point=None, size=None):
         array
         """
         mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
+        mu, tau = broadcast_distribution_samples([mu, tau], size=size)
         return generate_samples(self._random, mu, tau,
                                 dist_shape=self.shape,
                                 size=size)
@@ -1959,6 +1964,7 @@ def random(self, point=None, size=None):
         """
         alpha, m = draw_values([self.alpha, self.m],
                                point=point, size=size)
+        alpha, m = broadcast_distribution_samples([alpha, m], size=size)
         return generate_samples(self._random, alpha, m,
                                 dist_shape=self.shape,
                                 size=size)
@@ -2083,6 +2089,7 @@ def random(self, point=None, size=None):
         """
         alpha, beta = draw_values([self.alpha, self.beta],
                                   point=point, size=size)
+        alpha, beta = broadcast_distribution_samples([alpha, beta], size=size)
         return generate_samples(self._random, alpha, beta,
                                 dist_shape=self.shape,
                                 size=size)
@@ -2650,6 +2657,7 @@ def random(self, point=None, size=None):
         """
         alpha, beta = draw_values([self.alpha, self.beta],
                                   point=point, size=size)
+        alpha, beta = broadcast_distribution_samples([alpha, beta], size=size)
 
         def _random(a, b, size=None):
             return b * (-np.log(np.random.uniform(size=size)))**(1 / a)
@@ -2943,6 +2951,8 @@ def random(self, point=None, size=None):
         """
         mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu],
                                     point=point, size=size)
+        mu, sigma, nu = broadcast_distribution_samples([mu, sigma, nu],
+                                                       size=size)
 
         def _random(mu, sigma, nu, size=None):
             return (np.random.normal(mu, sigma, size=size)
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -6,7 +6,8 @@
 
 from pymc3.util import get_variable_name
 from .dist_math import bound, factln, binomln, betaln, logpow, random_choice
-from .distribution import Discrete, draw_values, generate_samples
+from .distribution import (Discrete, draw_values, generate_samples,
+                           broadcast_distribution_samples)
 from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp
 
 
@@ -345,6 +346,7 @@ def _ppf(self, p):
 
     def _random(self, q, beta, size=None):
         p = np.random.uniform(size=size)
+        p, q, beta = broadcast_distribution_samples([p, q, beta], size=size)
 
         return np.ceil(np.power(np.log(1 - p) / np.log(q), 1. / beta)) - 1
 
@@ -847,7 +849,8 @@ def random(self, point=None, size=None):
         g = generate_samples(stats.poisson.rvs, theta,
                              dist_shape=self.shape,
                              size=size)
-        return g * (np.random.random(np.squeeze(g.shape)) < psi)
+        g, psi = broadcast_distribution_samples([g, psi], size=size)
+        return g * (np.random.random(g.shape) < psi)
 
     def logp(self, value):
         psi = self.psi
@@ -939,7 +942,8 @@ def random(self, point=None, size=None):
         g = generate_samples(stats.binom.rvs, n, p,
                              dist_shape=self.shape,
                              size=size)
-        return g * (np.random.random(np.squeeze(g.shape)) < psi)
+        g, psi = broadcast_distribution_samples([g, psi], size=size)
+        return g * (np.random.random(g.shape) < psi)
 
     def logp(self, value):
         psi = self.psi
@@ -1057,7 +1061,8 @@ def random(self, point=None, size=None):
                              dist_shape=self.shape,
                              size=size)
         g[g == 0] = np.finfo(float).eps  # Just in case
-        return stats.poisson.rvs(g) * (np.random.random(np.squeeze(g.shape)) < psi)
+        g, psi = broadcast_distribution_samples([g, psi], size=size)
+        return stats.poisson.rvs(g) * (np.random.random(g.shape) < psi)
 
     def logp(self, value):
         alpha = self.alpha
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
@@ -636,3 +636,30 @@ def generate_samples(generator, *args, **kwargs):
     if one_d and samples.shape[-1] == 1:
         samples = samples.reshape(samples.shape[:-1])
     return np.asarray(samples)
+
+
+def broadcast_distribution_samples(samples, size=None):
+    if size is None:
+        return np.broadcast_arrays(*samples)
+    _size = to_tuple(size)
+    try:
+        broadcasted_samples = np.broadcast_arrays(*samples)
+    except ValueError:
+        # Raw samples shapes
+        p_shapes = [p.shape for p in samples]
+        # samples shapes without the size prepend
+        sp_shapes = [s[len(_size):] if _size == s[:len(_size)] else s
+                     for s in p_shapes]
+        broadcast_shape = np.broadcast(*[np.empty(s) for s in sp_shapes]).shape
+        broadcasted_samples = []
+        for param, p_shape, sp_shape in zip(samples, p_shapes, sp_shapes):
+            if _size == p_shape[:len(_size)]:
+                slicer_head = [slice(None)] * len(_size)
+            else:
+                slicer_head = [np.newaxis] * len(_size)
+            slicer_tail = ([np.newaxis] * (len(broadcast_shape) -
+                                           len(sp_shape)) +
+                           [slice(None)] * len(sp_shape))
+            broadcasted_samples.append(param[tuple(slicer_head + slicer_tail)])
+        broadcasted_samples = np.broadcast_arrays(*broadcasted_samples)
+    return broadcasted_samples
diff --git a/pymc3/parallel_sampling.py b/pymc3/parallel_sampling.py
@@ -6,6 +6,7 @@
 from collections import namedtuple
 import traceback
 from pymc3.exceptions import SamplingError
+import errno
 
 import numpy as np
 
@@ -14,6 +15,34 @@
 logger = logging.getLogger("pymc3")
 
 
+def _get_broken_pipe_exception():
+    import sys
+    if sys.platform == 'win32':
+        return RuntimeError("The communication pipe between the main process "
+                            "and its spawned children is broken.\n"
+                            "In Windows OS, this usually means that the child "
+                            "process raised an exception while it was being "
+                            "spawned, before it was setup to communicate to "
+                            "the main process.\n"
+                            "The exceptions raised by the child process while "
+                            "spawning cannot be caught or handled from the "
+                            "main process, and when running from an IPython or "
+                            "jupyter notebook interactive kernel, the child's "
+                            "exception and traceback appears to be lost.\n"
+                            "A known way to see the child's error, and try to "
+                            "fix or handle it, is to run the problematic code "
+                            "as a batch script from a system's Command Prompt. "
+                            "The child's exception will be printed to the "
+                            "Command Promt's stderr, and it should be visible "
+                            "above this error and traceback.\n"
+                            "Note that if running a jupyter notebook that was "
+                            "invoked from a Command Prompt, the child's "
+                            "exception should have been printed to the Command "
+                            "Prompt on which the notebook is running.")
+    else:
+        return None
+
+
 class ParallelSamplingError(Exception):
     def __init__(self, message, chain, warnings=None):
         super().__init__(message)
@@ -83,10 +112,19 @@ def run(self):
             pass
         except BaseException as e:
             e = ExceptionWithTraceback(e, e.__traceback__)
+            # Send is not blocking so we have to force a wait for the abort
+            # message
             self._msg_pipe.send(("error", None, e))
+            self._wait_for_abortion()
         finally:
             self._msg_pipe.close()
 
+    def _wait_for_abortion(self):
+        while True:
+            msg = self._recv_msg()
+            if msg[0] == "abort":
+                break
+
     def _make_numpy_refs(self):
         shape_dtypes = self._step_method.vars_shape_dtype
         point = {}
@@ -200,7 +238,18 @@ def __init__(self, draws, tune, step_method, chain, seed, start):
             seed,
         )
         # We fork right away, so that the main process can start tqdm threads
-        self._process.start()
+        try:
+            self._process.start()
+        except IOError as e:
+            # Something may have gone wrong during the fork / spawn
+            if e.errno == errno.EPIPE:
+                exc = _get_broken_pipe_exception()
+                if exc is not None:
+                    # Sleep a little to give the child process time to flush
+                    # all its error message
+                    time.sleep(0.2)
+                    raise exc
+            raise
 
     @property
     def shared_point_view(self):
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
@@ -467,3 +467,13 @@ def test_shape_edgecase(self):
             x = pm.Normal('x', mu=mu, sigma=sd, shape=5)
             prior = pm.sample_prior_predictive(10)
         assert prior['mu'].shape == (10, 5)
+
+    def test_zeroinflatedpoisson(self):
+        with pm.Model():
+            theta = pm.Beta('theta', alpha=1, beta=1)
+            psi = pm.HalfNormal('psi', sd=1)
+            pm.ZeroInflatedPoisson('suppliers', psi=psi, theta=theta, shape=20)
+            gen_data = pm.sample_prior_predictive(samples=5000)
+            assert gen_data['theta'].shape == (5000,)
+            assert gen_data['psi'].shape == (5000,)
+            assert gen_data['suppliers'].shape == (5000, 20)