From 455de0586742a72bc95c078ff4eb69a198cc86ed Mon Sep 17 00:00:00 2001
From: Michal Raczycki <michalr265@gmail.com>
Date: Sun, 16 Apr 2023 17:54:19 +0100
Subject: [PATCH 1/6] adaptations to integrate with mmm

---
 pymc_experimental/model_builder.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/pymc_experimental/model_builder.py b/pymc_experimental/model_builder.py
index 3f48f3f9..c5ba5bcd 100644
--- a/pymc_experimental/model_builder.py
+++ b/pymc_experimental/model_builder.py
@@ -23,6 +23,8 @@
 import numpy as np
 import pandas as pd
 import pymc as pm
+from pymc.util import RandomState
+from traitlets import Any
 
 
 class ModelBuilder:
@@ -191,7 +193,7 @@ def load(cls, fname: str):
             data=idata.fit_data.to_dataframe(),
         )
         model_builder.idata = idata
-        model_builder.build()
+        model_builder.build_model()
         if model_builder.id != idata.attrs["id"]:
             raise ValueError(
                 f"The file '{fname}' does not contain an inference data of the same model or configuration as '{cls._model_type}'"
@@ -200,7 +202,12 @@ def load(cls, fname: str):
         return model_builder
 
     def fit(
-        self, data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None
+        self,
+        progressbar: bool = True,
+        random_seed: RandomState = None,
+        data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None,
+        *args: Any,
+        **kwargs: Any,
     ) -> az.InferenceData:
         """
         Fit a model using the data passed as a parameter.
@@ -227,9 +234,9 @@ def fit(
         # If a new data was provided, assign it to the model
         if data is not None:
             self.data = data
-
-        self.build()
-        self._data_setter(data)
+        self.model_data, self.model_config = self.create_sample_input(data=self.data)
+        self.build_model(self.model_data, self.model_config)
+        self._data_setter(self.data)
 
         with self.model:
             self.idata = pm.sample(**self.sampler_config)
@@ -240,7 +247,7 @@ def fit(
         self.idata.attrs["model_type"] = self._model_type
         self.idata.attrs["version"] = self.version
         self.idata.attrs["sampler_config"] = json.dumps(self.sampler_config)
-        self.idata.attrs["model_config"] = json.dumps(self.model_config)
+        self.idata.attrs["model_config"] = json.dumps(self.serializable_model_config)
         self.idata.add_groups(fit_data=self.data.to_xarray())
         return self.idata
 

From e62f9240c034b1261a009c5d1a1f298b3ea9d60a Mon Sep 17 00:00:00 2001
From: Michal Raczycki <michalr265@gmail.com>
Date: Mon, 17 Apr 2023 08:48:54 +0100
Subject: [PATCH 2/6] adapted model_config and descriptions

---
 pymc_experimental/model_builder.py            | 52 +++++++++++++++----
 pymc_experimental/tests/test_model_builder.py | 33 ++++++------
 2 files changed, 58 insertions(+), 27 deletions(-)

diff --git a/pymc_experimental/model_builder.py b/pymc_experimental/model_builder.py
index c5ba5bcd..84a9ea8e 100644
--- a/pymc_experimental/model_builder.py
+++ b/pymc_experimental/model_builder.py
@@ -102,7 +102,7 @@ def _data_setter(
     @abstractmethod
     def create_sample_input():
         """
-        Needs to be implemented by the user in the inherited class.
+        Needs to be implemented by the user in the child class.
         Returns examples for data, model_config, sampler_config.
         This is useful for understanding the required
         data structures for the user model.
@@ -116,12 +116,15 @@ def create_sample_input():
         >>>    data = pd.DataFrame({'input': x, 'output': y})
 
         >>>    model_config = {
-        >>>       'a_loc': 7,
-        >>>       'a_scale': 3,
-        >>>       'b_loc': 5,
-        >>>       'b_scale': 3,
-        >>>       'obs_error': 2,
-        >>>    }
+        >>>          'a' : {
+        >>>              'a_loc': 7,
+        >>>              'a_scale' : 3
+        >>>           },
+        >>>          'b' : {
+        >>>              'b_loc': 3,
+        >>>              'b_scale': 5
+        >>>          }
+        >>>          'obs_error': 2
 
         >>>    sampler_config = {
         >>>       'draws': 1_000,
@@ -134,6 +137,31 @@ def create_sample_input():
 
         raise NotImplementedError
 
+    @abstractmethod
+    def build_model(
+        model_data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]],
+        model_config: Dict[str, Union[int, float, Dict]],
+    ) -> None:
+        """
+        Needs to be implemented by the user in the child class.
+        Creates an instance of pm.Model based on provided model_data and model_config, and
+        attaches it to self.
+
+        Required Parameters
+        ----------
+        model_data - preformated data that is going to be used in the model.
+        For efficiency reasons it should contain only the necesary data columns, not entire available
+        dataset since it's going to be encoded into data used to recreate the model.
+        model_config - dictionary where keys are strings representing names of parameters of the model, values are
+        dictionaries of parameters needed for creating model parameters (see example in create_model_input)
+
+        Returns:
+        ----------
+        None
+
+        """
+        raise NotImplementedError
+
     def save(self, fname: str) -> None:
         """
         Saves inference data of the model.
@@ -193,7 +221,7 @@ def load(cls, fname: str):
             data=idata.fit_data.to_dataframe(),
         )
         model_builder.idata = idata
-        model_builder.build_model()
+        model_builder.idata = model_builder.fit()
         if model_builder.id != idata.attrs["id"]:
             raise ValueError(
                 f"The file '{fname}' does not contain an inference data of the same model or configuration as '{cls._model_type}'"
@@ -234,9 +262,11 @@ def fit(
         # If a new data was provided, assign it to the model
         if data is not None:
             self.data = data
-        self.model_data, self.model_config = self.create_sample_input(data=self.data)
+        self.model_data, self.model_config, self.sampler_config = self.create_sample_input(
+            data=self.data
+        )
         self.build_model(self.model_data, self.model_config)
-        self._data_setter(self.data)
+        self._data_setter(self.model_data)
 
         with self.model:
             self.idata = pm.sample(**self.sampler_config)
@@ -248,7 +278,7 @@ def fit(
         self.idata.attrs["version"] = self.version
         self.idata.attrs["sampler_config"] = json.dumps(self.sampler_config)
         self.idata.attrs["model_config"] = json.dumps(self.serializable_model_config)
-        self.idata.add_groups(fit_data=self.data.to_xarray())
+        self.idata.add_groups(fit_data=self.model_data.to_xarray())
         return self.idata
 
     def predict(
diff --git a/pymc_experimental/tests/test_model_builder.py b/pymc_experimental/tests/test_model_builder.py
index 20845fee..e17e9d1c 100644
--- a/pymc_experimental/tests/test_model_builder.py
+++ b/pymc_experimental/tests/test_model_builder.py
@@ -28,19 +28,18 @@ class test_ModelBuilder(ModelBuilder):
     _model_type = "LinearModel"
     version = "0.1"
 
-    def build(self):
-
+    def build_model(self, model_data, model_config):
         with pm.Model() as self.model:
-            if self.data is not None:
-                x = pm.MutableData("x", self.data["input"].values)
-                y_data = pm.MutableData("y_data", self.data["output"].values)
+            if model_data is not None:
+                x = pm.MutableData("x", model_data["input"].values)
+                y_data = pm.MutableData("y_data", model_data["output"].values)
 
             # prior parameters
-            a_loc = self.model_config["a_loc"]
-            a_scale = self.model_config["a_scale"]
-            b_loc = self.model_config["b_loc"]
-            b_scale = self.model_config["b_scale"]
-            obs_error = self.model_config["obs_error"]
+            a_loc = model_config["a"]["loc"]
+            a_scale = model_config["a"]["scale"]
+            b_loc = model_config["b"]["loc"]
+            b_scale = model_config["b"]["scale"]
+            obs_error = model_config["obs_error"]
 
             # priors
             a = pm.Normal("a", a_loc, sigma=a_scale)
@@ -48,7 +47,7 @@ def build(self):
             obs_error = pm.HalfNormal("σ_model_fmc", obs_error)
 
             # observed data
-            if self.data is not None:
+            if model_data is not None:
                 y_model = pm.Normal("y_model", a + b * x, obs_error, shape=x.shape, observed=y_data)
 
     def _data_setter(self, data: pd.DataFrame):
@@ -57,18 +56,20 @@ def _data_setter(self, data: pd.DataFrame):
             if "output" in data.columns:
                 pm.set_data({"y_data": data["output"].values})
 
+    @property
+    def serializable_model_config(self):
+        return self.model_config
+
     @classmethod
-    def create_sample_input(self):
+    def create_sample_input(self, data=None):
         x = np.linspace(start=0, stop=1, num=100)
         y = 5 * x + 3
         y = y + np.random.normal(0, 1, len(x))
         data = pd.DataFrame({"input": x, "output": y})
 
         model_config = {
-            "a_loc": 0,
-            "a_scale": 10,
-            "b_loc": 0,
-            "b_scale": 10,
+            "a": {"loc": 0, "scale": 10},
+            "b": {"loc": 0, "scale": 10},
             "obs_error": 2,
         }
 

From 6f4bb259dafa258c715cd17b4e3367615621326f Mon Sep 17 00:00:00 2001
From: Michal Raczycki <michalr265@gmail.com>
Date: Mon, 17 Apr 2023 08:59:24 +0100
Subject: [PATCH 3/6] fixed ModuleNotFoundError from build

---
 pymc_experimental/model_builder.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pymc_experimental/model_builder.py b/pymc_experimental/model_builder.py
index 84a9ea8e..e6ca59bb 100644
--- a/pymc_experimental/model_builder.py
+++ b/pymc_experimental/model_builder.py
@@ -17,14 +17,13 @@
 import json
 from abc import abstractmethod
 from pathlib import Path
-from typing import Dict, Union
+from typing import Any, Dict, Union
 
 import arviz as az
 import numpy as np
 import pandas as pd
 import pymc as pm
 from pymc.util import RandomState
-from traitlets import Any
 
 
 class ModelBuilder:

From 4c9987755a4c8b48d1428752f388437376d1f206 Mon Sep 17 00:00:00 2001
From: Michal Raczycki <michalr265@gmail.com>
Date: Mon, 17 Apr 2023 11:41:00 +0100
Subject: [PATCH 4/6] small tweaks to make mmm tests work smoother

---
 pymc_experimental/model_builder.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pymc_experimental/model_builder.py b/pymc_experimental/model_builder.py
index e6ca59bb..6021443a 100644
--- a/pymc_experimental/model_builder.py
+++ b/pymc_experimental/model_builder.py
@@ -266,9 +266,8 @@ def fit(
         )
         self.build_model(self.model_data, self.model_config)
         self._data_setter(self.model_data)
-
         with self.model:
-            self.idata = pm.sample(**self.sampler_config)
+            self.idata = pm.sample(**self.sampler_config, **kwargs)
             self.idata.extend(pm.sample_prior_predictive())
             self.idata.extend(pm.sample_posterior_predictive(self.idata))
 
@@ -277,7 +276,7 @@ def fit(
         self.idata.attrs["version"] = self.version
         self.idata.attrs["sampler_config"] = json.dumps(self.sampler_config)
         self.idata.attrs["model_config"] = json.dumps(self.serializable_model_config)
-        self.idata.add_groups(fit_data=self.model_data.to_xarray())
+        self.idata.add_groups(fit_data=self.data.to_xarray())
         return self.idata
 
     def predict(

From e89724c0ea2ae1617971388fffe25ad0742b06fd Mon Sep 17 00:00:00 2001
From: Michal Raczycki <michalr265@gmail.com>
Date: Wed, 19 Apr 2023 12:17:24 +0100
Subject: [PATCH 5/6] new test for save, fit allows custom configs

---
 pymc_experimental/model_builder.py            | 34 ++++++++++++++-----
 pymc_experimental/tests/test_model_builder.py | 15 ++++++--
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/pymc_experimental/model_builder.py b/pymc_experimental/model_builder.py
index 6021443a..da28cc70 100644
--- a/pymc_experimental/model_builder.py
+++ b/pymc_experimental/model_builder.py
@@ -180,9 +180,11 @@ def save(self, fname: str) -> None:
         >>> name = './mymodel.nc'
         >>> model.save(name)
         """
-
-        file = Path(str(fname))
-        self.idata.to_netcdf(file)
+        if self.idata is not None and "fit_data" in self.idata:
+            file = Path(str(fname))
+            self.idata.to_netcdf(file)
+        else:
+            raise RuntimeError("The model hasn't been fit yet, call .fit() first")
 
     @classmethod
     def load(cls, fname: str):
@@ -220,7 +222,7 @@ def load(cls, fname: str):
             data=idata.fit_data.to_dataframe(),
         )
         model_builder.idata = idata
-        model_builder.idata = model_builder.fit()
+        model_builder.build_model(model_builder.data, model_builder.model_config)
         if model_builder.id != idata.attrs["id"]:
             raise ValueError(
                 f"The file '{fname}' does not contain an inference data of the same model or configuration as '{cls._model_type}'"
@@ -261,11 +263,12 @@ def fit(
         # If a new data was provided, assign it to the model
         if data is not None:
             self.data = data
-        self.model_data, self.model_config, self.sampler_config = self.create_sample_input(
-            data=self.data
-        )
+        self.model_data, model_config, sampler_config = self.create_sample_input(data=self.data)
+        if self.model_config is None:
+            self.model_config = model_config
+        if self.sampler_config is None:
+            self.sampler_config = sampler_config
         self.build_model(self.model_data, self.model_config)
-        self._data_setter(self.model_data)
         with self.model:
             self.idata = pm.sample(**self.sampler_config, **kwargs)
             self.idata.extend(pm.sample_prior_predictive())
@@ -275,7 +278,7 @@ def fit(
         self.idata.attrs["model_type"] = self._model_type
         self.idata.attrs["version"] = self.version
         self.idata.attrs["sampler_config"] = json.dumps(self.sampler_config)
-        self.idata.attrs["model_config"] = json.dumps(self.serializable_model_config)
+        self.idata.attrs["model_config"] = json.dumps(self._serializable_model_config)
         self.idata.add_groups(fit_data=self.data.to_xarray())
         return self.idata
 
@@ -386,6 +389,19 @@ def _extract_samples(post_pred: az.data.inference_data.InferenceData) -> Dict[st
 
         return post_pred_dict
 
+    @property
+    @abstractmethod
+    def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]:
+        """
+        Converts non-serializable values from model_config to their serializable reversable equivalent.
+        Data types like pandas DataFrame, Series or datetime aren't JSON serializable,
+        so in order to save the model they need to be formatted.
+
+        Returns
+        -------
+        model_config: dict
+        """
+
     @property
     def id(self) -> str:
         """
diff --git a/pymc_experimental/tests/test_model_builder.py b/pymc_experimental/tests/test_model_builder.py
index e17e9d1c..5ff32846 100644
--- a/pymc_experimental/tests/test_model_builder.py
+++ b/pymc_experimental/tests/test_model_builder.py
@@ -57,7 +57,7 @@ def _data_setter(self, data: pd.DataFrame):
                 pm.set_data({"y_data": data["output"].values})
 
     @property
-    def serializable_model_config(self):
+    def _serializable_model_config(self):
         return self.model_config
 
     @classmethod
@@ -95,7 +95,16 @@ def initial_build_and_fit(check_idata=True) -> ModelBuilder:
         return model_builder
 
 
-def test_empty_model_config():
+def test_save_without_fit_raises_runtime_error():
+    data, model_config, sampler_config = test_ModelBuilder.create_sample_input()
+    model_builder = test_ModelBuilder(
+        model_config=model_config, sampler_config=sampler_config, data=data
+    )
+    with pytest.raises(RuntimeError):
+        model_builder.save("saved_model")
+
+
+def test_empty_sampler_config_fit():
     data, model_config, sampler_config = test_ModelBuilder.create_sample_input()
     sampler_config = {}
     model_builder = test_ModelBuilder(
@@ -106,7 +115,7 @@ def test_empty_model_config():
     assert "posterior" in model_builder.idata.groups()
 
 
-def test_empty_model_config():
+def test_empty_model_config_fit():
     data, model_config, sampler_config = test_ModelBuilder.create_sample_input()
     model_config = {}
     model_builder = test_ModelBuilder(

From 947a67de5a1bfb0d92419be8d0e6087bd3d0617b Mon Sep 17 00:00:00 2001
From: Michal Raczycki <michalr265@gmail.com>
Date: Wed, 19 Apr 2023 13:01:16 +0100
Subject: [PATCH 6/6] updating create_sample_input example

---
 pymc_experimental/model_builder.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pymc_experimental/model_builder.py b/pymc_experimental/model_builder.py
index da28cc70..79562f9e 100644
--- a/pymc_experimental/model_builder.py
+++ b/pymc_experimental/model_builder.py
@@ -116,12 +116,12 @@ def create_sample_input():
 
         >>>    model_config = {
         >>>          'a' : {
-        >>>              'a_loc': 7,
-        >>>              'a_scale' : 3
+        >>>              'loc': 7,
+        >>>              'scale' : 3
         >>>           },
         >>>          'b' : {
-        >>>              'b_loc': 3,
-        >>>              'b_scale': 5
+        >>>              'loc': 3,
+        >>>              'scale': 5
         >>>          }
         >>>          'obs_error': 2