Skip to content

Model Builder refactoring #119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions pymc_experimental/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@


import hashlib
from abc import abstractmethod
from pathlib import Path
from typing import Dict, Union

Expand All @@ -23,7 +24,7 @@
import pymc as pm


class ModelBuilder(pm.Model):
class ModelBuilder:
"""
ModelBuilder can be used to provide an easy-to-use API (similar to scikit-learn) for models
and help with deployment.
Expand Down Expand Up @@ -63,16 +64,15 @@ def __init__(
self.sample_config = sampler_config # parameters for sampling
self.idata = None # inference data object
self.data = data
self.build()

def build(self):
"""
Builds the defined model.
"""

with self:
self.build_model(self.model_config, self.data)
self.build_model(self, self.model_config, self.data)

@abstractmethod
def _data_setter(
self, data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]], x_only: bool = True
):
Expand All @@ -99,8 +99,10 @@ def _data_setter(

raise NotImplementedError

@classmethod
def create_sample_input(cls):
# need a discussion if it's really needed.
@staticmethod
@abstractmethod
def create_sample_input():
"""
Needs to be implemented by the user in the inherited class.
Returns examples for data, model_config, sampler_config.
Expand Down Expand Up @@ -169,7 +171,7 @@ def load(cls, fname):

Returns
-------
Returns the inference data that is loaded from local system.
Returns an instance of pm.Model, that is loaded from local data.

Raises
------
Expand All @@ -192,12 +194,13 @@ def load(cls, fname):
idata.fit_data.to_dataframe(),
)
self.idata = idata
self.build()
if self.id != idata.attrs["id"]:
raise ValueError(
f"The file '{fname}' does not contain an inference data of the same model or configuration as '{self._model_type}'"
)

return self
return self.model

def fit(self, data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None):
"""
Expand All @@ -224,12 +227,10 @@ def fit(self, data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None

if data is not None:
self.data = data
self._data_setter(data)

if self.basic_RVs == []:
self.build()
self._data_setter(data)

with self:
with self.model:
self.idata = pm.sample(**self.sample_config)
self.idata.extend(pm.sample_prior_predictive())
self.idata.extend(pm.sample_posterior_predictive(self.idata))
Expand Down
72 changes: 39 additions & 33 deletions pymc_experimental/tests/test_model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import hashlib
import sys
import tempfile
Expand All @@ -29,26 +28,32 @@ class test_ModelBuilder(ModelBuilder):
_model_type = "LinearModel"
version = "0.1"

def build_model(self, model_config, data=None):
if data is not None:
x = pm.MutableData("x", data["input"].values)
y_data = pm.MutableData("y_data", data["output"].values)

# prior parameters
a_loc = model_config["a_loc"]
a_scale = model_config["a_scale"]
b_loc = model_config["b_loc"]
b_scale = model_config["b_scale"]
obs_error = model_config["obs_error"]

# priors
a = pm.Normal("a", a_loc, sigma=a_scale)
b = pm.Normal("b", b_loc, sigma=b_scale)
obs_error = pm.HalfNormal("σ_model_fmc", obs_error)

# observed data
if data is not None:
y_model = pm.Normal("y_model", a + b * x, obs_error, shape=x.shape, observed=y_data)
def build_model(self, model_instance, model_config, data=None):
model_instance.model_config = model_config
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can just keep the model_config and data in the base class (instance self). The init() method should also take care of that.

model_instance.data = data
self.model_config = model_config
self.data = data

with pm.Model() as model_instance.model:
if data is not None:
x = pm.MutableData("x", data["input"].values)
y_data = pm.MutableData("y_data", data["output"].values)

# prior parameters
a_loc = model_config["a_loc"]
a_scale = model_config["a_scale"]
b_loc = model_config["b_loc"]
b_scale = model_config["b_scale"]
obs_error = model_config["obs_error"]

# priors
a = pm.Normal("a", a_loc, sigma=a_scale)
b = pm.Normal("b", b_loc, sigma=b_scale)
obs_error = pm.HalfNormal("σ_model_fmc", obs_error)

# observed data
if data is not None:
y_model = pm.Normal("y_model", a + b * x, obs_error, shape=x.shape, observed=y_data)

def _data_setter(self, data: pd.DataFrame):
with self.model:
Expand All @@ -57,7 +62,7 @@ def _data_setter(self, data: pd.DataFrame):
pm.set_data({"y_data": data["output"].values})

@classmethod
def create_sample_input(cls):
def create_sample_input(self):
x = np.linspace(start=0, stop=1, num=100)
y = 5 * x + 3
y = y + np.random.normal(0, 1, len(x))
Expand All @@ -83,12 +88,12 @@ def create_sample_input(cls):
@staticmethod
def initial_build_and_fit(check_idata=True):
data, model_config, sampler_config = test_ModelBuilder.create_sample_input()
model = test_ModelBuilder(model_config, sampler_config, data)
model.fit()
model_builder = test_ModelBuilder(model_config, sampler_config, data)
model_builder.idata = model_builder.fit(data=data)
if check_idata:
assert model.idata is not None
assert "posterior" in model.idata.groups()
return model
assert model_builder.idata is not None
assert "posterior" in model_builder.idata.groups()
return model_builder


def test_fit():
Expand All @@ -105,16 +110,17 @@ def test_fit():
sys.platform == "win32", reason="Permissions for temp files not granted on windows CI."
)
def test_save_load():
model = test_ModelBuilder.initial_build_and_fit(False)
test_builder = test_ModelBuilder.initial_build_and_fit()
temp = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False)
model.save(temp.name)
model2 = test_ModelBuilder.load(temp.name)
assert model.idata.groups() == model2.idata.groups()
test_builder.save(temp.name)
test_builder2 = test_ModelBuilder.initial_build_and_fit()
test_builder2.model = test_ModelBuilder.load(temp.name)
assert test_builder.idata.groups() == test_builder2.idata.groups()

x_pred = np.random.uniform(low=0, high=1, size=100)
prediction_data = pd.DataFrame({"input": x_pred})
pred1 = model.predict(prediction_data)
pred2 = model2.predict(prediction_data)
pred1 = test_builder.predict(prediction_data)
pred2 = test_builder2.predict(prediction_data)
assert pred1["y_model"].shape == pred2["y_model"].shape
temp.close()

Expand Down