Skip to content

Return posterior predictive samples from all chains in ModelBuilder #140

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 20, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 17 additions & 45 deletions pymc_experimental/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import numpy as np
import pandas as pd
import pymc as pm
import xarray as xr


class ModelBuilder:
Expand Down Expand Up @@ -248,7 +249,7 @@ def predict(
self,
data_prediction: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None,
extend_idata: bool = True,
) -> dict:
) -> xr.Dataset:
"""
Uses model to predict on unseen data and return point prediction of all the samples

Expand All @@ -261,7 +262,7 @@ def predict(

Returns
-------
returns dictionary of sample's mean of posterior predict.
returns posterior mean of predictive samples

Examples
--------
Expand All @@ -270,42 +271,33 @@ def predict(
>>> idata = model.fit(data)
>>> x_pred = []
>>> prediction_data = pd.DataFrame({'input':x_pred})
# point predict
>>> pred_mean = model.predict(prediction_data)
"""

if data_prediction is not None: # set new input data
self._data_setter(data_prediction)

with self.model: # sample with new input data
post_pred = pm.sample_posterior_predictive(self.idata)
if extend_idata:
self.idata.extend(post_pred)
# reshape output
post_pred = self._extract_samples(post_pred)
for key in post_pred:
post_pred[key] = post_pred[key].mean(axis=0)

return post_pred
posterior_predictive_samples = self.predict_posterior(data_prediction, extend_idata)
posterior_means = posterior_predictive_samples.mean(dim=["chain", "draw"], keep_attrs=True)
return posterior_means

def predict_posterior(
self,
data_prediction: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None,
extend_idata: bool = True,
) -> Dict[str, np.array]:
combined: bool = False,
) -> xr.Dataset:
"""
Uses model to predict samples on unseen data.
Generate posterior predictive samples on unseen data.

Parameters
---------
data_prediction : Dictionary of string and either of numpy array, pandas dataframe or pandas Series
It is the data we need to make prediction on using the model.
extend_idata : Boolean determining whether the predictions should be added to inference data object.
Defaults to True.
combined: Combine chain and draw dims into sample. Won’t work if a dim named sample already exists.
Defaults to False.

Returns
-------
returns dictionary of sample's posterior predict.
returns posterior predictive samples

Examples
--------
Expand All @@ -314,8 +306,7 @@ def predict_posterior(
>>> idata = model.fit(data)
>>> x_pred = []
>>> prediction_data = pd.DataFrame({'input': x_pred})
# samples
>>> pred_mean = model.predict_posterior(prediction_data)
>>> pred_samples = model.predict_posterior(prediction_data)
"""

if data_prediction is not None: # set new input data
Expand All @@ -326,30 +317,11 @@ def predict_posterior(
if extend_idata:
self.idata.extend(post_pred)

# reshape output
post_pred = self._extract_samples(post_pred)

return post_pred

@staticmethod
def _extract_samples(post_pred: az.data.inference_data.InferenceData) -> Dict[str, np.array]:
"""
This method can be used to extract samples from posterior predict.

Parameters
----------
post_pred: arviz InferenceData object

Returns
-------
Dictionary of numpy arrays from InferenceData object
"""

post_pred_dict = dict()
for key in post_pred.posterior_predictive:
post_pred_dict[key] = post_pred.posterior_predictive[key].to_numpy()[0]
posterior_predictive_samples = az.extract(
post_pred, "posterior_predictive", combined=combined
)

return post_pred_dict
return posterior_predictive_samples

@property
def id(self) -> str:
Expand Down
41 changes: 11 additions & 30 deletions pymc_experimental/tests/test_model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ class test_ModelBuilder(ModelBuilder):
version = "0.1"

def build(self):

with pm.Model() as self.model:
if self.data is not None:
x = pm.MutableData("x", self.data["input"].values)
Expand Down Expand Up @@ -148,40 +147,22 @@ def test_predict():
prediction_data = pd.DataFrame({"input": x_pred})
pred = model.predict(prediction_data)
assert "y_model" in pred
assert isinstance(pred, dict)
assert len(prediction_data.input.values) == len(pred["y_model"])
assert isinstance(pred["y_model"][0], (np.float32, np.float64))
assert np.issubdtype(pred["y_model"].dtype, np.floating)


def test_predict_posterior():
@pytest.mark.parametrize("combined", [True, False])
def test_predict_posterior(combined):
model = test_ModelBuilder.initial_build_and_fit()
x_pred = np.random.uniform(low=0, high=1, size=100)
n_pred = 100
x_pred = np.random.uniform(low=0, high=1, size=n_pred)
prediction_data = pd.DataFrame({"input": x_pred})
pred = model.predict_posterior(prediction_data)
assert "y_model" in pred
assert isinstance(pred, dict)
assert len(prediction_data.input.values) == len(pred["y_model"][0])
assert isinstance(pred["y_model"][0], np.ndarray)


def test_extract_samples():
# create a fake InferenceData object
with pm.Model() as model:
x = pm.Normal("x", mu=0, sigma=1)
intercept = pm.Normal("intercept", mu=0, sigma=1)
y_model = pm.Normal("y_model", mu=x * intercept, sigma=1, observed=[0, 1, 2])

idata = pm.sample(1000, tune=1000)
post_pred = pm.sample_posterior_predictive(idata)

# call the function and get the output
samples_dict = test_ModelBuilder._extract_samples(post_pred)

# assert that the keys and values are correct
assert len(samples_dict) == len(post_pred.posterior_predictive)
for key in post_pred.posterior_predictive:
expected_value = post_pred.posterior_predictive[key].to_numpy()[0]
assert np.array_equal(samples_dict[key], expected_value)
pred = model.predict_posterior(prediction_data, combined=combined)
chains = model.idata.sample_stats.dims["chain"]
draws = model.idata.sample_stats.dims["draw"]
expected_shape = (n_pred, chains * draws) if combined else (chains, draws, n_pred)
assert pred["y_model"].shape == expected_shape
assert np.issubdtype(pred["y_model"].dtype, np.floating)


def test_id():
Expand Down