pymc-devs · twiecki · Oct 22, 2022 · Oct 14, 2022 · Oct 21, 2022 · Oct 22, 2022
diff --git a/pymc_experimental/model_builder.py b/pymc_experimental/model_builder.py
@@ -245,10 +245,49 @@ def fit(self, data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None
     def predict(
         self,
         data_prediction: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None,
-        point_estimate: bool = True,
     ):
         """
-        Uses model to predict on unseen data.
+        Uses model to predict on unseen data and return point prediction of all the samples
+
+        Parameters
+        ---------
+        data_prediction : Dictionary of string and either of numpy array, pandas dataframe or pandas Series
+            It is the data we need to make prediction on using the model.
+
+        Returns
+        -------
+        returns dictionary of sample's mean of posterior predict.
+
+        Examples
+        --------
+        >>> data, model_config, sampler_config = LinearModel.create_sample_input()
+        >>> model = LinearModel(model_config, sampler_config)
+        >>> idata = model.fit(data)
+        >>> x_pred = []
+        >>> prediction_data = pd.DataFrame({'input':x_pred})
+        # point predict
+        >>> pred_mean = model.predict(prediction_data)
+        """
+
+        if data_prediction is not None:  # set new input data
+            self._data_setter(data_prediction)
+
+        with self.model:  # sample with new input data
+            post_pred = pm.sample_posterior_predictive(self.idata)
+
+        # reshape output
+        post_pred = self._extract_samples(post_pred)
+        for key in post_pred:
+            post_pred[key] = post_pred[key].mean(axis=0)
+
+        return post_pred
+
+    def predict_posterior(
+        self,
+        data_prediction: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None,
+    ):
+        """
+        Uses model to predict samples on unseen data.`
 
         Parameters
         ---------
@@ -268,10 +307,8 @@ def predict(
         >>> idata = model.fit(data)
         >>> x_pred = []
         >>> prediction_data = pd.DataFrame({'input':x_pred})
-        # only point estimate
-        >>> pred_mean = model.predict(prediction_data)
         # samples
-        >>> pred_samples = model.predict(prediction_data, point_estimate=False)
+        >>> pred_mean = model.predict_posterior(prediction_data)
         """
 
         if data_prediction is not None:  # set new input data
@@ -282,9 +319,6 @@ def predict(
 
         # reshape output
         post_pred = self._extract_samples(post_pred)
-        if point_estimate:  # average, if point-like estimate desired
-            for key in post_pred:
-                post_pred[key] = post_pred[key].mean(axis=0)
 
         return post_pred
 

diff --git a/pymc_experimental/tests/test_model_builder.py b/pymc_experimental/tests/test_model_builder.py
@@ -135,3 +135,35 @@ def test_predict():
         y_test = pm.sample_posterior_predictive(idata)
 
         assert str(model_2.idata.groups) == str(idata.groups)
+
+
+def test_predict_posterior():
+    x_pred = np.random.uniform(low=0, high=1, size=100)
+    prediction_data = pd.DataFrame({"input": x_pred})
+    data, model_config, sampler_config = test_ModelBuilder.create_sample_input()
+    model_2 = test_ModelBuilder(model_config, sampler_config, data)
+    model_2.idata = model_2.fit()
+    model_2.predict_posterior(prediction_data)
+    with pm.Model() as model:
+        x = np.linspace(start=0, stop=1, num=100)
+        y = 5 * x + 3
+        x = pm.MutableData("x", x)
+        y_data = pm.MutableData("y_data", y)
+        a_loc = 7
+        a_scale = 3
+        b_loc = 5
+        b_scale = 3
+        obs_error = 2
+
+        a = pm.Normal("a", a_loc, sigma=a_scale)
+        b = pm.Normal("b", b_loc, sigma=b_scale)
+        obs_error = pm.HalfNormal("σ_model_fmc", obs_error)
+
+        y_model = pm.Normal("y_model", a + b * x, obs_error, observed=y_data)
+
+        idata = pm.sample(tune=10, draws=20, chains=3, cores=1)
+        idata.extend(pm.sample_prior_predictive())
+        idata.extend(pm.sample_posterior_predictive(idata))
+        y_test = pm.sample_posterior_predictive(idata)
+
+        assert str(model_2.idata.groups) == str(idata.groups)