Skip to content

Commit fcbdf96

Browse files
authored
Merge branch 'master' into rename_sd_to_sigma
2 parents cc9f4ef + 98fd63e commit fcbdf96

File tree

6 files changed

+111
-7
lines changed

6 files changed

+111
-7
lines changed

RELEASE-NOTES.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66

77
### Maintenance
88

9-
- All occurances of `sd` as a parameter has been replaced with `sigma` for consistency. `sd` will continue to be available for backwards compatibility.
9+
- All occurances of `sd` as a parameter name have been renamed to `sigma`. `sd` will continue to function for backwards compatibility.
10+
- Made `BrokenPipeError` for parallel sampling more verbose on Windows.
11+
- Added the `broadcast_distribution_samples` function that helps broadcasting arrays of drawn samples, taking into account the requested `size` and the inferred distribution shape. This sometimes is needed by distributions that call several `rvs` separately within their `random` method, such as the `ZeroInflatedPoisson` (Fix issue #3310).
12+
- The `Wald`, `Kumaraswamy`, `LogNormal`, `Pareto`, `Cauchy`, `HalfCauchy`, `Weibull` and `ExGaussian` distributions `random` method used a hidden `_random` function that was written with scalars in mind. This could potentially lead to artificial correlations between random draws. Added shape guards and broadcasting of the distribution samples to prevent this (Similar to issue #3310).
1013

1114
### Deprecations
1215

pymc3/distributions/continuous.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow,
2020
normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue,
2121
)
22-
from .distribution import Continuous, draw_values, generate_samples
22+
from .distribution import (Continuous, draw_values, generate_samples,
23+
broadcast_distribution_samples)
2324

2425
__all__ = ['Uniform', 'Flat', 'HalfFlat', 'Normal', 'TruncatedNormal', 'Beta',
2526
'Kumaraswamy', 'Exponential', 'Laplace', 'StudentT', 'Cauchy',
@@ -964,6 +965,8 @@ def random(self, point=None, size=None):
964965
"""
965966
mu, lam, alpha = draw_values([self.mu, self.lam, self.alpha],
966967
point=point, size=size)
968+
mu, lam, alpha = broadcast_distribution_samples([mu, lam, alpha],
969+
size=size)
967970
return generate_samples(self._random,
968971
mu, lam, alpha,
969972
dist_shape=self.shape,
@@ -1293,6 +1296,7 @@ def random(self, point=None, size=None):
12931296
"""
12941297
a, b = draw_values([self.a, self.b],
12951298
point=point, size=size)
1299+
a, b = broadcast_distribution_samples([a, b], size=size)
12961300
return generate_samples(self._random, a, b,
12971301
dist_shape=self.shape,
12981302
size=size)
@@ -1669,6 +1673,7 @@ def random(self, point=None, size=None):
16691673
array
16701674
"""
16711675
mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
1676+
mu, tau = broadcast_distribution_samples([mu, tau], size=size)
16721677
return generate_samples(self._random, mu, tau,
16731678
dist_shape=self.shape,
16741679
size=size)
@@ -1959,6 +1964,7 @@ def random(self, point=None, size=None):
19591964
"""
19601965
alpha, m = draw_values([self.alpha, self.m],
19611966
point=point, size=size)
1967+
alpha, m = broadcast_distribution_samples([alpha, m], size=size)
19621968
return generate_samples(self._random, alpha, m,
19631969
dist_shape=self.shape,
19641970
size=size)
@@ -2083,6 +2089,7 @@ def random(self, point=None, size=None):
20832089
"""
20842090
alpha, beta = draw_values([self.alpha, self.beta],
20852091
point=point, size=size)
2092+
alpha, beta = broadcast_distribution_samples([alpha, beta], size=size)
20862093
return generate_samples(self._random, alpha, beta,
20872094
dist_shape=self.shape,
20882095
size=size)
@@ -2650,6 +2657,7 @@ def random(self, point=None, size=None):
26502657
"""
26512658
alpha, beta = draw_values([self.alpha, self.beta],
26522659
point=point, size=size)
2660+
alpha, beta = broadcast_distribution_samples([alpha, beta], size=size)
26532661

26542662
def _random(a, b, size=None):
26552663
return b * (-np.log(np.random.uniform(size=size)))**(1 / a)
@@ -2943,6 +2951,8 @@ def random(self, point=None, size=None):
29432951
"""
29442952
mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu],
29452953
point=point, size=size)
2954+
mu, sigma, nu = broadcast_distribution_samples([mu, sigma, nu],
2955+
size=size)
29462956

29472957
def _random(mu, sigma, nu, size=None):
29482958
return (np.random.normal(mu, sigma, size=size)

pymc3/distributions/discrete.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
from pymc3.util import get_variable_name
88
from .dist_math import bound, factln, binomln, betaln, logpow, random_choice
9-
from .distribution import Discrete, draw_values, generate_samples
9+
from .distribution import (Discrete, draw_values, generate_samples,
10+
broadcast_distribution_samples)
1011
from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp
1112

1213

@@ -345,6 +346,7 @@ def _ppf(self, p):
345346

346347
def _random(self, q, beta, size=None):
347348
p = np.random.uniform(size=size)
349+
p, q, beta = broadcast_distribution_samples([p, q, beta], size=size)
348350

349351
return np.ceil(np.power(np.log(1 - p) / np.log(q), 1. / beta)) - 1
350352

@@ -847,7 +849,8 @@ def random(self, point=None, size=None):
847849
g = generate_samples(stats.poisson.rvs, theta,
848850
dist_shape=self.shape,
849851
size=size)
850-
return g * (np.random.random(np.squeeze(g.shape)) < psi)
852+
g, psi = broadcast_distribution_samples([g, psi], size=size)
853+
return g * (np.random.random(g.shape) < psi)
851854

852855
def logp(self, value):
853856
psi = self.psi
@@ -939,7 +942,8 @@ def random(self, point=None, size=None):
939942
g = generate_samples(stats.binom.rvs, n, p,
940943
dist_shape=self.shape,
941944
size=size)
942-
return g * (np.random.random(np.squeeze(g.shape)) < psi)
945+
g, psi = broadcast_distribution_samples([g, psi], size=size)
946+
return g * (np.random.random(g.shape) < psi)
943947

944948
def logp(self, value):
945949
psi = self.psi
@@ -1057,7 +1061,8 @@ def random(self, point=None, size=None):
10571061
dist_shape=self.shape,
10581062
size=size)
10591063
g[g == 0] = np.finfo(float).eps # Just in case
1060-
return stats.poisson.rvs(g) * (np.random.random(np.squeeze(g.shape)) < psi)
1064+
g, psi = broadcast_distribution_samples([g, psi], size=size)
1065+
return stats.poisson.rvs(g) * (np.random.random(g.shape) < psi)
10611066

10621067
def logp(self, value):
10631068
alpha = self.alpha

pymc3/distributions/distribution.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,3 +636,30 @@ def generate_samples(generator, *args, **kwargs):
636636
if one_d and samples.shape[-1] == 1:
637637
samples = samples.reshape(samples.shape[:-1])
638638
return np.asarray(samples)
639+
640+
641+
def broadcast_distribution_samples(samples, size=None):
642+
if size is None:
643+
return np.broadcast_arrays(*samples)
644+
_size = to_tuple(size)
645+
try:
646+
broadcasted_samples = np.broadcast_arrays(*samples)
647+
except ValueError:
648+
# Raw samples shapes
649+
p_shapes = [p.shape for p in samples]
650+
# samples shapes without the size prepend
651+
sp_shapes = [s[len(_size):] if _size == s[:len(_size)] else s
652+
for s in p_shapes]
653+
broadcast_shape = np.broadcast(*[np.empty(s) for s in sp_shapes]).shape
654+
broadcasted_samples = []
655+
for param, p_shape, sp_shape in zip(samples, p_shapes, sp_shapes):
656+
if _size == p_shape[:len(_size)]:
657+
slicer_head = [slice(None)] * len(_size)
658+
else:
659+
slicer_head = [np.newaxis] * len(_size)
660+
slicer_tail = ([np.newaxis] * (len(broadcast_shape) -
661+
len(sp_shape)) +
662+
[slice(None)] * len(sp_shape))
663+
broadcasted_samples.append(param[tuple(slicer_head + slicer_tail)])
664+
broadcasted_samples = np.broadcast_arrays(*broadcasted_samples)
665+
return broadcasted_samples

pymc3/parallel_sampling.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from collections import namedtuple
77
import traceback
88
from pymc3.exceptions import SamplingError
9+
import errno
910

1011
import numpy as np
1112

@@ -14,6 +15,34 @@
1415
logger = logging.getLogger("pymc3")
1516

1617

18+
def _get_broken_pipe_exception():
19+
import sys
20+
if sys.platform == 'win32':
21+
return RuntimeError("The communication pipe between the main process "
22+
"and its spawned children is broken.\n"
23+
"In Windows OS, this usually means that the child "
24+
"process raised an exception while it was being "
25+
"spawned, before it was setup to communicate to "
26+
"the main process.\n"
27+
"The exceptions raised by the child process while "
28+
"spawning cannot be caught or handled from the "
29+
"main process, and when running from an IPython or "
30+
"jupyter notebook interactive kernel, the child's "
31+
"exception and traceback appears to be lost.\n"
32+
"A known way to see the child's error, and try to "
33+
"fix or handle it, is to run the problematic code "
34+
"as a batch script from a system's Command Prompt. "
35+
"The child's exception will be printed to the "
36+
"Command Promt's stderr, and it should be visible "
37+
"above this error and traceback.\n"
38+
"Note that if running a jupyter notebook that was "
39+
"invoked from a Command Prompt, the child's "
40+
"exception should have been printed to the Command "
41+
"Prompt on which the notebook is running.")
42+
else:
43+
return None
44+
45+
1746
class ParallelSamplingError(Exception):
1847
def __init__(self, message, chain, warnings=None):
1948
super().__init__(message)
@@ -83,10 +112,19 @@ def run(self):
83112
pass
84113
except BaseException as e:
85114
e = ExceptionWithTraceback(e, e.__traceback__)
115+
# Send is not blocking so we have to force a wait for the abort
116+
# message
86117
self._msg_pipe.send(("error", None, e))
118+
self._wait_for_abortion()
87119
finally:
88120
self._msg_pipe.close()
89121

122+
def _wait_for_abortion(self):
123+
while True:
124+
msg = self._recv_msg()
125+
if msg[0] == "abort":
126+
break
127+
90128
def _make_numpy_refs(self):
91129
shape_dtypes = self._step_method.vars_shape_dtype
92130
point = {}
@@ -200,7 +238,18 @@ def __init__(self, draws, tune, step_method, chain, seed, start):
200238
seed,
201239
)
202240
# We fork right away, so that the main process can start tqdm threads
203-
self._process.start()
241+
try:
242+
self._process.start()
243+
except IOError as e:
244+
# Something may have gone wrong during the fork / spawn
245+
if e.errno == errno.EPIPE:
246+
exc = _get_broken_pipe_exception()
247+
if exc is not None:
248+
# Sleep a little to give the child process time to flush
249+
# all its error message
250+
time.sleep(0.2)
251+
raise exc
252+
raise
204253

205254
@property
206255
def shared_point_view(self):

pymc3/tests/test_sampling.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,3 +467,13 @@ def test_shape_edgecase(self):
467467
x = pm.Normal('x', mu=mu, sigma=sd, shape=5)
468468
prior = pm.sample_prior_predictive(10)
469469
assert prior['mu'].shape == (10, 5)
470+
471+
def test_zeroinflatedpoisson(self):
472+
with pm.Model():
473+
theta = pm.Beta('theta', alpha=1, beta=1)
474+
psi = pm.HalfNormal('psi', sd=1)
475+
pm.ZeroInflatedPoisson('suppliers', psi=psi, theta=theta, shape=20)
476+
gen_data = pm.sample_prior_predictive(samples=5000)
477+
assert gen_data['theta'].shape == (5000,)
478+
assert gen_data['psi'].shape == (5000,)
479+
assert gen_data['suppliers'].shape == (5000, 20)

0 commit comments

Comments
 (0)