Skip to content

Commit 2852edc

Browse files
committed
separated function for weighted ppc
1 parent 8ef4dc5 commit 2852edc

File tree

1 file changed

+92
-22
lines changed

1 file changed

+92
-22
lines changed

pymc3/sampling.py

Lines changed: 92 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import sys
1919
sys.setrecursionlimit(10000)
2020

21-
__all__ = ['sample', 'iter_sample', 'sample_ppc', 'init_nuts']
21+
__all__ = ['sample', 'iter_sample', 'sample_ppc', 'sample_ppc_w', 'init_nuts']
2222

2323
STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
2424
BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis)
@@ -486,14 +486,13 @@ def _update_start_vals(a, b, model):
486486

487487

488488
def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
489-
weights=None, random_seed=None, progressbar=True):
489+
random_seed=None, progressbar=True):
490490
"""Generate posterior predictive samples from a model given a trace.
491491
492492
Parameters
493493
----------
494494
trace : backend, list, or MultiTrace
495-
Trace generated from MCMC sampling. If a set of weights is also passed
496-
this can be a list of traces, useful for model averaging.
495+
Trace generated from MCMC sampling.
497496
samples : int
498497
Number of posterior predictive samples to generate. Defaults to the
499498
length of `trace`
@@ -505,8 +504,6 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
505504
size : int
506505
The number of random draws from the distribution specified by the
507506
parameters in each sample of the trace.
508-
weights: array-like
509-
Individuals weights for each trace, useful for model averaging
510507
random_seed : int
511508
Seed for the random number generator.
512509
progressbar : bool
@@ -522,6 +519,9 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
522519
posterior predictive samples. If a set of weights and a matching number
523520
of traces are provided, then the samples will be weighted.
524521
"""
522+
if samples is None:
523+
samples = len(trace)
524+
525525
if model is None:
526526
model = modelcontext(model)
527527

@@ -530,23 +530,94 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
530530

531531
seed(random_seed)
532532

533-
if weights is not None:
534-
if len(trace) != len(weights):
535-
raise ValueError(
536-
'The number of traces and weights should be the same')
533+
indices = randint(0, len(trace), samples)
534+
if progressbar:
535+
indices = tqdm(indices, total=samples)
537536

538-
weights = np.asarray(weights)
539-
p = weights / np.sum(weights)
537+
ppc = defaultdict(list)
538+
for idx in indices:
539+
param = trace[idx]
540+
for var in vars:
541+
ppc[var.name].append(var.distribution.random(point=param,
542+
size=size))
540543

541-
min_tr = min([len(i) for i in trace])
544+
return {k: np.asarray(v) for k, v in ppc.items()}
542545

543-
n = (min_tr * p).astype('int')
544-
# ensure n sum up to min_tr
545-
idx = np.argmax(n)
546-
n[idx] = n[idx] + min_tr - np.sum(n)
547546

548-
trace = np.concatenate([np.random.choice(trace[i], j)
549-
for i, j in enumerate(n)])
547+
def sample_ppc_w(traces, samples=None, models=None, size=None, weights=None,
548+
random_seed=None, progressbar=True):
549+
"""Generate weighted posterior predictive samples from a list of models and
550+
a list of traces according to a set of weights.
551+
552+
Parameters
553+
----------
554+
traces : list
555+
List of traces generated from MCMC sampling. The number of traces should
556+
be equal to the number of weights.
557+
samples : int
558+
Number of posterior predictive samples to generate. Defaults to the
559+
length of the shorter trace in traces.
560+
models : list
561+
List of models used to generate the list of traces. The number of models
562+
should be equal to the number of weights and the number of observed RVs
563+
should be the same for all models.
564+
By default a single model will be inferred from `with` context, in this
565+
case results will only be meaningful if all models share the same
566+
distributions for the observed RVs.
567+
size : int
568+
The number of random draws from the distributions specified by the
569+
parameters in each sample of the trace.
570+
weights: array-like
571+
Individual weights for each trace. Default, same weight for each model.
572+
random_seed : int
573+
Seed for the random number generator.
574+
progressbar : bool
575+
Whether or not to display a progress bar in the command line. The
576+
bar shows the percentage of completion, the sampling speed in
577+
samples per second (SPS), and the estimated remaining time until
578+
completion ("expected time of arrival"; ETA).
579+
580+
Returns
581+
-------
582+
samples : dict
583+
Dictionary with the variables as keys. The values corresponding to the
584+
posterior predictive samples from the weighted models.
585+
"""
586+
seed(random_seed)
587+
588+
if models is None:
589+
models = [modelcontext(models)] * len(traces)
590+
591+
if weights is None:
592+
weights = [1] * len(traces)
593+
594+
if len(traces) != len(weights):
595+
raise ValueError('The number of traces and weights should be the same')
596+
597+
if len(models) != len(weights):
598+
raise ValueError('The number of models and weights should be the same')
599+
600+
lenght_morv = len(models[0].observed_RVs)
601+
if not all(len(i.observed_RVs) == lenght_morv for i in models):
602+
raise ValueError(
603+
'The number of observed RVs should be the same for all models')
604+
605+
weights = np.asarray(weights)
606+
p = weights / np.sum(weights)
607+
608+
min_tr = min([len(i) for i in traces])
609+
610+
n = (min_tr * p).astype('int')
611+
# ensure n sum up to min_tr
612+
idx = np.argmax(n)
613+
n[idx] = n[idx] + min_tr - np.sum(n)
614+
615+
trace = np.concatenate([np.random.choice(traces[i], j)
616+
for i, j in enumerate(n)])
617+
618+
variables = []
619+
for i, m in enumerate(models):
620+
variables.extend(m.observed_RVs * n[i])
550621

551622
len_trace = len(trace)
552623

@@ -561,9 +632,8 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
561632
ppc = defaultdict(list)
562633
for idx in indices:
563634
param = trace[idx]
564-
for var in vars:
565-
ppc[var.name].append(var.distribution.random(point=param,
566-
size=size))
635+
var = variables[idx]
636+
ppc[var.name].append(var.distribution.random(point=param, size=size))
567637

568638
return {k: np.asarray(v) for k, v in ppc.items()}
569639

0 commit comments

Comments
 (0)