Skip to content

Merging BayesianEstimator into ModelBuilder #165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
68 changes: 50 additions & 18 deletions pymc_experimental/bayesian_estimator_linearmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class BayesianEstimator(ModelBuilder):

def __init__(
self,
data: Union[np.ndarray, pd.DataFrame, pd.Series] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are you adding this back in?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed, I want to experiment with adding a no-parameter initialization that will act as a sandbox for the new users, but keeping the data in the constructor helps to reduce the number of adaptations before going into implementing model builder in other classes. Also it has low impact, and should be easy to remove later if we don't like it

model_config: Dict = None,
sampler_config: Dict = None,
):
Expand All @@ -75,16 +76,9 @@ def __init__(
"""
if model_config is None:
model_config = self.default_model_config
self.model_config = model_config

if sampler_config is None:
sampler_config = self.default_sampler_config
self.sampler_config = sampler_config

self.model = None # Set by build_model
self.output_var = None # Set by build_model
self.idata = None # idata is generated during fitting
self.is_fitted_ = False
super().__init__(data=data, model_config=model_config, sampler_config=sampler_config)

@property
@abstractmethod
Expand All @@ -103,16 +97,11 @@ def _validate_data(self, X, y=None):
return check_array(X, accept_sparse=False)

@abstractmethod
def build_model(self) -> None:
"""
Build the PYMC model. The model is built with placeholder data.
Actual data will be set by _data_setter when fitting or evaluating the model.
Data array size can change but number of dimensions must stay the same.
def build_model(
model_data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None,
model_config: Dict[str, Union[int, float, Dict]] = None,
) -> None:

Returns:
----------
None
"""
raise NotImplementedError

@abstractmethod
Expand Down Expand Up @@ -425,6 +414,25 @@ def default_sampler_config(self):
}

def build_model(self):
"""
Build the PyMC model.

Returns
-------
None

Examples
--------
>>> self.build_model()
>>> assert self.model is not None
>>> assert isinstance(self.model, pm.Model)
>>> assert "intercept" in self.model.named_vars
>>> assert "slope" in self.model.named_vars
>>> assert "σ_model_fmc" in self.model.named_vars
>>> assert "y_model" in self.model.named_vars
>>> assert "y_hat" in self.model.named_vars
>>> assert self.output_var == "y_hat"
"""
cfg = self.model_config

# The model is built with placeholder data.
Expand Down Expand Up @@ -462,7 +470,31 @@ def _data_setter(self, X, y=None):
pm.set_data({"y_data": y.squeeze()})

@classmethod
def create_sample_input(cls, nsamples=100):
def generate_model_data(cls, nsamples=100, data=None):
"""
Generate model data for linear regression.

Parameters
----------
nsamples : int, optional
The number of samples to generate. Default is 100.
data : np.ndarray, optional
An optional data array to add noise to.

Returns
-------
tuple
A tuple of two np.ndarrays representing the feature matrix and target vector, respectively.

Examples
--------
>>> import numpy as np
>>> x, y = cls.generate_model_data()
>>> assert isinstance(x, np.ndarray)
>>> assert isinstance(y, np.ndarray)
>>> assert x.shape == (100, 1)
>>> assert y.shape == (100,)
"""
x = np.linspace(start=0, stop=1, num=nsamples)
y = 5 * x + 3
y = y + np.random.normal(0, 1, len(x))
Expand Down
Loading