18
18
import sys
19
19
sys .setrecursionlimit (10000 )
20
20
21
- __all__ = ['sample' , 'iter_sample' , 'sample_ppc' , 'init_nuts' ]
21
+ __all__ = ['sample' , 'iter_sample' , 'sample_ppc' , 'sample_ppc_w' , ' init_nuts' ]
22
22
23
23
STEP_METHODS = (NUTS , HamiltonianMC , Metropolis , BinaryMetropolis ,
24
24
BinaryGibbsMetropolis , Slice , CategoricalGibbsMetropolis )
@@ -486,14 +486,13 @@ def _update_start_vals(a, b, model):
486
486
487
487
488
488
def sample_ppc (trace , samples = None , model = None , vars = None , size = None ,
489
- weights = None , random_seed = None , progressbar = True ):
489
+ random_seed = None , progressbar = True ):
490
490
"""Generate posterior predictive samples from a model given a trace.
491
491
492
492
Parameters
493
493
----------
494
494
trace : backend, list, or MultiTrace
495
- Trace generated from MCMC sampling. If a set of weights is also passed
496
- this can be a list of traces, useful for model averaging.
495
+ Trace generated from MCMC sampling.
497
496
samples : int
498
497
Number of posterior predictive samples to generate. Defaults to the
499
498
length of `trace`
@@ -505,8 +504,6 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
505
504
size : int
506
505
The number of random draws from the distribution specified by the
507
506
parameters in each sample of the trace.
508
- weights: array-like
509
- Individuals weights for each trace, useful for model averaging
510
507
random_seed : int
511
508
Seed for the random number generator.
512
509
progressbar : bool
@@ -522,6 +519,9 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
522
519
posterior predictive samples. If a set of weights and a matching number
523
520
of traces are provided, then the samples will be weighted.
524
521
"""
522
+ if samples is None :
523
+ samples = len (trace )
524
+
525
525
if model is None :
526
526
model = modelcontext (model )
527
527
@@ -530,23 +530,94 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
530
530
531
531
seed (random_seed )
532
532
533
- if weights is not None :
534
- if len (trace ) != len (weights ):
535
- raise ValueError (
536
- 'The number of traces and weights should be the same' )
533
+ indices = randint (0 , len (trace ), samples )
534
+ if progressbar :
535
+ indices = tqdm (indices , total = samples )
537
536
538
- weights = np .asarray (weights )
539
- p = weights / np .sum (weights )
537
+ ppc = defaultdict (list )
538
+ for idx in indices :
539
+ param = trace [idx ]
540
+ for var in vars :
541
+ ppc [var .name ].append (var .distribution .random (point = param ,
542
+ size = size ))
540
543
541
- min_tr = min ([ len ( i ) for i in trace ])
544
+ return { k : np . asarray ( v ) for k , v in ppc . items ()}
542
545
543
- n = (min_tr * p ).astype ('int' )
544
- # ensure n sum up to min_tr
545
- idx = np .argmax (n )
546
- n [idx ] = n [idx ] + min_tr - np .sum (n )
547
546
548
- trace = np .concatenate ([np .random .choice (trace [i ], j )
549
- for i , j in enumerate (n )])
547
+ def sample_ppc_w (traces , samples = None , models = None , size = None , weights = None ,
548
+ random_seed = None , progressbar = True ):
549
+ """Generate weighted posterior predictive samples from a list of models and
550
+ a list of traces according to a set of weights.
551
+
552
+ Parameters
553
+ ----------
554
+ traces : list
555
+ List of traces generated from MCMC sampling. The number of traces should
556
+ be equal to the number of weights.
557
+ samples : int
558
+ Number of posterior predictive samples to generate. Defaults to the
559
+ length of the shorter trace in traces.
560
+ models : list
561
+ List of models used to generate the list of traces. The number of models
562
+ should be equal to the number of weights and the number of observed RVs
563
+ should be the same for all models.
564
+ By default a single model will be inferred from `with` context, in this
565
+ case results will only be meaningful if all models share the same
566
+ distributions for the observed RVs.
567
+ size : int
568
+ The number of random draws from the distributions specified by the
569
+ parameters in each sample of the trace.
570
+ weights: array-like
571
+ Individual weights for each trace. Default, same weight for each model.
572
+ random_seed : int
573
+ Seed for the random number generator.
574
+ progressbar : bool
575
+ Whether or not to display a progress bar in the command line. The
576
+ bar shows the percentage of completion, the sampling speed in
577
+ samples per second (SPS), and the estimated remaining time until
578
+ completion ("expected time of arrival"; ETA).
579
+
580
+ Returns
581
+ -------
582
+ samples : dict
583
+ Dictionary with the variables as keys. The values corresponding to the
584
+ posterior predictive samples from the weighted models.
585
+ """
586
+ seed (random_seed )
587
+
588
+ if models is None :
589
+ models = [modelcontext (models )] * len (traces )
590
+
591
+ if weights is None :
592
+ weights = [1 ] * len (traces )
593
+
594
+ if len (traces ) != len (weights ):
595
+ raise ValueError ('The number of traces and weights should be the same' )
596
+
597
+ if len (models ) != len (weights ):
598
+ raise ValueError ('The number of models and weights should be the same' )
599
+
600
+ lenght_morv = len (models [0 ].observed_RVs )
601
+ if not all (len (i .observed_RVs ) == lenght_morv for i in models ):
602
+ raise ValueError (
603
+ 'The number of observed RVs should be the same for all models' )
604
+
605
+ weights = np .asarray (weights )
606
+ p = weights / np .sum (weights )
607
+
608
+ min_tr = min ([len (i ) for i in traces ])
609
+
610
+ n = (min_tr * p ).astype ('int' )
611
+ # ensure n sum up to min_tr
612
+ idx = np .argmax (n )
613
+ n [idx ] = n [idx ] + min_tr - np .sum (n )
614
+
615
+ trace = np .concatenate ([np .random .choice (traces [i ], j )
616
+ for i , j in enumerate (n )])
617
+
618
+ variables = []
619
+ for i , m in enumerate (models ):
620
+ variables .extend (m .observed_RVs * n [i ])
550
621
551
622
len_trace = len (trace )
552
623
@@ -561,9 +632,8 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
561
632
ppc = defaultdict (list )
562
633
for idx in indices :
563
634
param = trace [idx ]
564
- for var in vars :
565
- ppc [var .name ].append (var .distribution .random (point = param ,
566
- size = size ))
635
+ var = variables [idx ]
636
+ ppc [var .name ].append (var .distribution .random (point = param , size = size ))
567
637
568
638
return {k : np .asarray (v ) for k , v in ppc .items ()}
569
639
0 commit comments