support xarray.Dataset in sample_posterior_predictive

michaelosthege · michaelosthege · commit 796c9bbad156 · 2020-03-20T17:21:48.000+01:00
+ closes pymc-devs#3828
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -8,6 +8,7 @@
 - `DEMetropolisZ`, an improved variant of `DEMetropolis` brings better parallelization and higher efficiency with fewer chains with a slower initial convergence. This implementation is experimental. See [#3784](https://github.com/pymc-devs/pymc3/pull/3784) for more info.
 - Notebooks that give insight into `DEMetropolis`, `DEMetropolisZ` and the `DifferentialEquation` interface are now located in the [Tutorials/Deep Dive](https://docs.pymc.io/nb_tutorials/index.html) section.
 - Add `fast_sample_posterior_predictive`, a vectorized alternative to `sample_posterior_predictive`.  This alternative is substantially faster for large models.
+- `sample_posterior_predictive` can now feed on `xarray.Dataset` - e.g. from `InferenceData.posterior`. (see [#3846](https://github.com/pymc-devs/pymc3/pull/3846))
 - `SamplerReport` (`MultiTrace.report`) now has properties `n_tune`, `n_draws`, `t_sampling` for increased convenience (see [#3827](https://github.com/pymc-devs/pymc3/pull/3827))
 
 ### Maintenance
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
@@ -30,6 +30,7 @@
 import numpy as np
 import theano.gradient as tg
 from theano.tensor import Tensor
+import xarray
 
 from .backends.base import BaseTrace, MultiTrace
 from .backends.ndarray import NDArray
@@ -1520,9 +1521,9 @@ def sample_posterior_predictive(
 
     Parameters
     ----------
-    trace: backend, list, or MultiTrace
-        Trace generated from MCMC sampling. Or a list containing dicts from
-        find_MAP() or points
+    trace: backend, list, xarray.Dataset, or MultiTrace
+        Trace generated from MCMC sampling, or a list of dicts (eg. points or from find_MAP()),
+        or xarray.Dataset (eg. InferenceData.posterior or InferenceData.prior)
     samples: int
         Number of posterior predictive samples to generate. Defaults to one posterior predictive
         sample per posterior sample, that is, the number of draws times the number of chains. It
@@ -1556,6 +1557,23 @@ def sample_posterior_predictive(
         Dictionary with the variable names as keys, and values numpy arrays containing
         posterior predictive samples.
     """
+    if isinstance(trace, xarray.Dataset):
+        # grab posterior samples for each variable
+        _samples = {
+            vn : trace[vn].values
+            for vn in trace.keys()
+        }
+        # make dicts
+        points = []
+        for c in trace.chain:
+            for d in trace.draw:
+                points.append({
+                    vn : s[c, d]
+                    for vn, s in _samples.items()
+                })
+        # use the list of points
+        trace = points
+
     len_trace = len(trace)
     try:
         nchain = trace.nchains