diff --git a/pymc_experimental/model_builder.py b/pymc_experimental/model_builder.py index 79562f9e..62d31e82 100644 --- a/pymc_experimental/model_builder.py +++ b/pymc_experimental/model_builder.py @@ -23,6 +23,7 @@ import numpy as np import pandas as pd import pymc as pm +import xarray as xr from pymc.util import RandomState @@ -286,7 +287,7 @@ def predict( self, data_prediction: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None, extend_idata: bool = True, - ) -> dict: + ) -> xr.Dataset: """ Uses model to predict on unseen data and return point prediction of all the samples @@ -299,7 +300,7 @@ def predict( Returns ------- - returns dictionary of sample's mean of posterior predict. + returns posterior mean of predictive samples Examples -------- @@ -308,31 +309,20 @@ def predict( >>> idata = model.fit(data) >>> x_pred = [] >>> prediction_data = pd.DataFrame({'input':x_pred}) - # point predict >>> pred_mean = model.predict(prediction_data) """ - - if data_prediction is not None: # set new input data - self._data_setter(data_prediction) - - with self.model: # sample with new input data - post_pred = pm.sample_posterior_predictive(self.idata) - if extend_idata: - self.idata.extend(post_pred) - # reshape output - post_pred = self._extract_samples(post_pred) - for key in post_pred: - post_pred[key] = post_pred[key].mean(axis=0) - - return post_pred + posterior_predictive_samples = self.predict_posterior(data_prediction, extend_idata) + posterior_means = posterior_predictive_samples.mean(dim=["chain", "draw"], keep_attrs=True) + return posterior_means def predict_posterior( self, data_prediction: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None, extend_idata: bool = True, - ) -> Dict[str, np.array]: + combined: bool = False, + ) -> xr.Dataset: """ - Uses model to predict samples on unseen data. + Generate posterior predictive samples on unseen data. Parameters --------- @@ -340,10 +330,12 @@ def predict_posterior( It is the data we need to make prediction on using the model. extend_idata : Boolean determining whether the predictions should be added to inference data object. Defaults to True. + combined: Combine chain and draw dims into sample. Won’t work if a dim named sample already exists. + Defaults to False. Returns ------- - returns dictionary of sample's posterior predict. + returns posterior predictive samples Examples -------- @@ -352,8 +344,7 @@ def predict_posterior( >>> idata = model.fit(data) >>> x_pred = [] >>> prediction_data = pd.DataFrame({'input': x_pred}) - # samples - >>> pred_mean = model.predict_posterior(prediction_data) + >>> pred_samples = model.predict_posterior(prediction_data) """ if data_prediction is not None: # set new input data @@ -364,30 +355,11 @@ def predict_posterior( if extend_idata: self.idata.extend(post_pred) - # reshape output - post_pred = self._extract_samples(post_pred) - - return post_pred - - @staticmethod - def _extract_samples(post_pred: az.data.inference_data.InferenceData) -> Dict[str, np.array]: - """ - This method can be used to extract samples from posterior predict. - - Parameters - ---------- - post_pred: arviz InferenceData object - - Returns - ------- - Dictionary of numpy arrays from InferenceData object - """ - - post_pred_dict = dict() - for key in post_pred.posterior_predictive: - post_pred_dict[key] = post_pred.posterior_predictive[key].to_numpy()[0] + posterior_predictive_samples = az.extract( + post_pred, "posterior_predictive", combined=combined + ) - return post_pred_dict + return posterior_predictive_samples @property @abstractmethod diff --git a/pymc_experimental/tests/test_model_builder.py b/pymc_experimental/tests/test_model_builder.py index 5ff32846..37eb0dab 100644 --- a/pymc_experimental/tests/test_model_builder.py +++ b/pymc_experimental/tests/test_model_builder.py @@ -158,40 +158,22 @@ def test_predict(): prediction_data = pd.DataFrame({"input": x_pred}) pred = model.predict(prediction_data) assert "y_model" in pred - assert isinstance(pred, dict) assert len(prediction_data.input.values) == len(pred["y_model"]) - assert isinstance(pred["y_model"][0], (np.float32, np.float64)) + assert np.issubdtype(pred["y_model"].dtype, np.floating) -def test_predict_posterior(): +@pytest.mark.parametrize("combined", [True, False]) +def test_predict_posterior(combined): model = test_ModelBuilder.initial_build_and_fit() - x_pred = np.random.uniform(low=0, high=1, size=100) + n_pred = 100 + x_pred = np.random.uniform(low=0, high=1, size=n_pred) prediction_data = pd.DataFrame({"input": x_pred}) - pred = model.predict_posterior(prediction_data) - assert "y_model" in pred - assert isinstance(pred, dict) - assert len(prediction_data.input.values) == len(pred["y_model"][0]) - assert isinstance(pred["y_model"][0], np.ndarray) - - -def test_extract_samples(): - # create a fake InferenceData object - with pm.Model() as model: - x = pm.Normal("x", mu=0, sigma=1) - intercept = pm.Normal("intercept", mu=0, sigma=1) - y_model = pm.Normal("y_model", mu=x * intercept, sigma=1, observed=[0, 1, 2]) - - idata = pm.sample(1000, tune=1000) - post_pred = pm.sample_posterior_predictive(idata) - - # call the function and get the output - samples_dict = test_ModelBuilder._extract_samples(post_pred) - - # assert that the keys and values are correct - assert len(samples_dict) == len(post_pred.posterior_predictive) - for key in post_pred.posterior_predictive: - expected_value = post_pred.posterior_predictive[key].to_numpy()[0] - assert np.array_equal(samples_dict[key], expected_value) + pred = model.predict_posterior(prediction_data, combined=combined) + chains = model.idata.sample_stats.dims["chain"] + draws = model.idata.sample_stats.dims["draw"] + expected_shape = (n_pred, chains * draws) if combined else (chains, draws, n_pred) + assert pred["y_model"].shape == expected_shape + assert np.issubdtype(pred["y_model"].dtype, np.floating) def test_id():