From b5f7d86088f2afb78261e7207712e5b954a8bdd8 Mon Sep 17 00:00:00 2001
From: Will Dean <wd60622@gmail.com>
Date: Fri, 15 Nov 2024 18:46:19 -0500
Subject: [PATCH 1/6] add notebook runner

---
 environment.yml                   |  24 ++++
 scripts/run_notebooks/injected.py |  76 +++++++++++++
 scripts/run_notebooks/runner.py   | 181 ++++++++++++++++++++++++++++++
 3 files changed, 281 insertions(+)
 create mode 100644 environment.yml
 create mode 100644 scripts/run_notebooks/injected.py
 create mode 100644 scripts/run_notebooks/runner.py

diff --git a/environment.yml b/environment.yml
new file mode 100644
index 000000000..4d57dac77
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,24 @@
+name: pymc-examples
+channels:
+- conda-forge
+dependencies:
+- python=3.11
+- pymc
+- pymc-bart
+- nutpie
+# spatial notebooks
+- geopandas
+- folium
+- libpysal
+- rasterio
+- pip:
+  - pymc-experimental
+  - preliz
+  - bambi
+  - jax
+  - papermill
+  - joblib
+  - jupyter
+  - seaborn
+  - watermark
+  - lifelines
diff --git a/scripts/run_notebooks/injected.py b/scripts/run_notebooks/injected.py
new file mode 100644
index 000000000..c8174343b
--- /dev/null
+++ b/scripts/run_notebooks/injected.py
@@ -0,0 +1,76 @@
+"""Injected code to the top of each notebook to mock long running code."""
+
+import os
+import numpy as np
+import pymc as pm
+import xarray as xr
+
+
+def mock_sample(*args, **kwargs):
+    if len(args) > 0:
+        draws = args[0]
+    else:
+        draws = kwargs.get("draws", 1000)
+    random_seed = kwargs.get("random_seed", None)
+    rng = np.random.default_rng(random_seed)
+    model = kwargs.get("model", None)
+    chains = kwargs.get("chains", os.cpu_count())
+    idata = pm.sample_prior_predictive(
+        model=model,
+        random_seed=random_seed,
+        samples=draws,
+    )
+    n_chains = chains
+    expanded_chains = xr.DataArray(
+        np.ones(n_chains),
+        coords={"chain": np.arange(n_chains)},
+    )
+    idata.add_groups(
+        posterior=(idata.prior.mean("chain") * expanded_chains).transpose(
+            "chain", "draw", ...
+        )
+    )
+    if "prior" in idata:
+        del idata.prior
+    if "prior_predictive" in idata:
+        del idata.prior_predictive
+
+    # Create mock sample stats with diverging data
+    if "sample_stats" not in idata:
+        n_chains = chains
+        n_draws = draws
+        sample_stats = xr.Dataset(
+            {
+                "diverging": xr.DataArray(
+                    np.zeros((n_chains, n_draws), dtype=int),
+                    dims=("chain", "draw"),
+                ),
+                "energy": xr.DataArray(
+                    rng.normal(loc=150, scale=2.5, size=(n_chains, n_draws)),
+                    dims=("chain", "draw"),
+                ),
+                "tree_depth": xr.DataArray(
+                    rng.choice(
+                        [1, 2, 3], p=[0.01, 0.86, 0.13], size=(n_chains, n_draws)
+                    ),
+                    dims=("chain", "draw"),
+                ),
+                "acceptance_rate": xr.DataArray(
+                    rng.beta(0.5, 0.5, size=(n_chains, n_draws)),
+                    dims=("chain", "draw"),
+                ),
+                # Different sampler
+                "accept": xr.DataArray(
+                    rng.choice([0, 1], size=(n_chains, n_draws)),
+                    dims=("chain", "draw"),
+                ),
+            }
+        )
+        idata.add_groups(sample_stats=sample_stats)
+
+    return idata
+
+
+pm.sample = mock_sample
+pm.HalfFlat = pm.HalfNormal
+pm.Flat = pm.Normal
diff --git a/scripts/run_notebooks/runner.py b/scripts/run_notebooks/runner.py
new file mode 100644
index 000000000..d1291754d
--- /dev/null
+++ b/scripts/run_notebooks/runner.py
@@ -0,0 +1,181 @@
+"""Script to run all notebooks in the docs/source/notebooks directory."""
+
+from argparse import ArgumentParser
+
+from rich.console import Console
+import logging
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+from typing import TypedDict
+from uuid import uuid4
+
+import papermill
+from joblib import Parallel, delayed
+from nbformat.notebooknode import NotebookNode
+from papermill.iorw import load_notebook_node, write_ipynb
+
+KERNEL_NAME: str = "python3"
+
+HERE = Path(__file__).parent
+INJECTED_CODE_FILE = HERE / "injected.py"
+INJECTED_CODE = INJECTED_CODE_FILE.read_text()
+
+
+def setup_logging() -> None:
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+
+
+def generate_random_id() -> str:
+    return str(uuid4())
+
+
+def inject_pymc_sample_mock_code(cells: list) -> None:
+    cells.insert(
+        0,
+        NotebookNode(
+            id=f"code-injection-{generate_random_id()}",
+            execution_count=sum(map(ord, "Mock pm.sample")),
+            cell_type="code",
+            metadata={"tags": []},
+            outputs=[],
+            source=INJECTED_CODE,
+        ),
+    )
+
+
+def mock_run(notebook_path: Path, i: int, total: int) -> None:
+    nb = load_notebook_node(str(notebook_path))
+    inject_pymc_sample_mock_code(nb.cells)
+    with NamedTemporaryFile(suffix=".ipynb") as f:
+        write_ipynb(nb, f.name)
+        desc = f"({i} / {total}) Mocked {notebook_path.name}"
+        papermill.execute_notebook(
+            input_path=f.name,
+            output_path=None,
+            progress_bar=dict(desc=desc),
+            kernel_name=KERNEL_NAME,
+            cwd=notebook_path.parent,
+        )
+
+
+def actual_run(notebook_path: Path, i: int, total: int) -> None:
+    papermill.execute_notebook(
+        input_path=notebook_path,
+        output_path=None,
+        kernel_name=KERNEL_NAME,
+        progress_bar={"desc": f"({i} / {total}) Running {notebook_path.name}"},
+        cwd=notebook_path.parent,
+    )
+
+
+class NotebookFailure(TypedDict):
+    notebook_path: Path
+    error: str
+
+
+def run_notebook(
+    notebook_path: Path,
+    i: int,
+    total: int,
+    mock: bool = True,
+) -> NotebookFailure | None:
+    logging.info(f"Running notebook: {notebook_path.name}")
+    run = mock_run if mock else actual_run
+
+    try:
+        run(notebook_path, i=i, total=total)
+    except Exception as e:
+        logging.error(
+            f"{e.__class__.__name__} encountered running notebook: {str(notebook_path)}"
+        )
+        return NotebookFailure(notebook_path=notebook_path, error=str(e))
+    else:
+        return
+
+
+class RunParams(TypedDict):
+    notebook_path: Path
+    mock: bool
+    i: int
+    total: int
+
+
+def run_parameters(notebook_paths: list[Path], mock: bool = True) -> list[RunParams]:
+    def to_mock(notebook_path: Path, i: int) -> RunParams:
+        return RunParams(
+            notebook_path=notebook_path, mock=mock, i=i, total=len(notebook_paths)
+        )
+
+    return [
+        to_mock(notebook_path, i=i)
+        for i, notebook_path in enumerate(notebook_paths, start=1)
+    ]
+
+
+def main(notebooks_to_run: list[Path], mock: bool = True) -> None:
+    console = Console()
+    errors: list[NotebookFailure]
+    setup_logging()
+    logging.info("Starting notebook runner")
+    logging.info(f"Running {len(notebooks_to_run)} notebook(s).")
+    results = Parallel(n_jobs=-1)(
+        delayed(run_notebook)(**run_params)
+        for run_params in run_parameters(notebooks_to_run, mock=mock)
+    )
+    errors = [result for result in results if result is not None]
+
+    if not errors:
+        logging.info("Notebooks run successfully!")
+        return
+
+    for error in errors:
+        console.rule(f"[bold red]Error running {error['notebook_path']}[/bold red]")
+        console.print(error["error"])
+
+    logging.error(f"{len(errors)} / {len(notebooks_to_run)} notebooks failed")
+
+
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--notebooks",
+        nargs="+",
+        help="List of notebooks to run. If not provided, all notebooks will be run.",
+    )
+    mock_group = parser.add_mutually_exclusive_group()
+    mock_group.add_argument(
+        "--mock",
+        action="store_true",
+        help="Run notebooks with mock code",
+        dest="mock",
+    )
+    mock_group.add_argument(
+        "--no-mock",
+        action="store_false",
+        help="Run notebooks without mock code",
+        dest="mock",
+    )
+    parser.set_defaults(mock=True)
+    args = parser.parse_args()
+
+    notebooks_to_run = []
+    notebooks = args.notebooks
+    notebooks = [Path(notebook) for notebook in notebooks]
+    for notebook in notebooks:
+        if notebook.is_dir():
+            notebooks_to_run.extend(notebook.glob("*.ipynb"))
+            notebooks_to_run.extend(notebook.glob("*/*.ipynb"))
+        else:
+            notebooks_to_run.append(notebook)
+
+    args.notebooks = notebooks_to_run
+
+    return args
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args.notebooks, mock=args.mock)

From ff441e03df7f4c806ede8b3422cb36075a300b1a Mon Sep 17 00:00:00 2001
From: Will Dean <wd60622@gmail.com>
Date: Fri, 15 Nov 2024 18:49:10 -0500
Subject: [PATCH 2/6] add a docstring

---
 scripts/run_notebooks/runner.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/scripts/run_notebooks/runner.py b/scripts/run_notebooks/runner.py
index d1291754d..06e5c741f 100644
--- a/scripts/run_notebooks/runner.py
+++ b/scripts/run_notebooks/runner.py
@@ -1,4 +1,22 @@
-"""Script to run all notebooks in the docs/source/notebooks directory."""
+"""CLI to notebook or directory of notebooks.
+
+Arguments
+---------
+--notebooks: Specific notebook or directory of notebooks to run.
+--mock: Run notebooks with mock code. Default is True. If --no-mock is provided,
+    notebooks will run without mock code.
+
+Examples
+--------
+Run all notebooks in a directory with mock code:
+
+    $ python scripts/run_notebooks/runner.py --notebooks notebooks/ --mock
+
+Run a single notebook without mocked code:
+
+    $ python scripts/run_notebooks/runner.py --notebooks notebooks/notebook.ipynb --no-mock
+
+"""
 
 from argparse import ArgumentParser
 

From cdfb6ab7c3053b240fcbb865a2f2247085bc2389 Mon Sep 17 00:00:00 2001
From: Will Dean <wd60622@gmail.com>
Date: Fri, 15 Nov 2024 18:54:01 -0500
Subject: [PATCH 3/6] add a docstring

---
 scripts/run_notebooks/runner.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/scripts/run_notebooks/runner.py b/scripts/run_notebooks/runner.py
index 06e5c741f..c9d6d799a 100644
--- a/scripts/run_notebooks/runner.py
+++ b/scripts/run_notebooks/runner.py
@@ -10,11 +10,21 @@
 --------
 Run all notebooks in a directory with mock code:
 
-    $ python scripts/run_notebooks/runner.py --notebooks notebooks/ --mock
+.. code-block:: bash
+
+    python scripts/run_notebooks/runner.py --notebooks notebooks/ --mock
 
 Run a single notebook without mocked code:
 
-    $ python scripts/run_notebooks/runner.py --notebooks notebooks/notebook.ipynb --no-mock
+.. code-block:: bash
+
+    python scripts/run_notebooks/runner.py --notebooks notebooks/notebook.ipynb --no-mock
+
+Run all the notebook is two different directories with mocked code (default):
+
+.. code-block:: bash
+
+    python scripts/run_notebooks/runner.py --notebooks notebooks/ notebooks2/
 
 """
 

From 059d628e3fc43754ffb5ba2a5877f9093afa3e81 Mon Sep 17 00:00:00 2001
From: Will Dean <wd60622@gmail.com>
Date: Fri, 15 Nov 2024 19:17:28 -0500
Subject: [PATCH 4/6] print out the successes as well

---
 scripts/run_notebooks/runner.py | 41 ++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/scripts/run_notebooks/runner.py b/scripts/run_notebooks/runner.py
index c9d6d799a..957ecd38d 100644
--- a/scripts/run_notebooks/runner.py
+++ b/scripts/run_notebooks/runner.py
@@ -31,6 +31,7 @@
 from argparse import ArgumentParser
 
 from rich.console import Console
+from dataclasses import dataclass
 import logging
 from pathlib import Path
 from tempfile import NamedTemporaryFile
@@ -99,7 +100,13 @@ def actual_run(notebook_path: Path, i: int, total: int) -> None:
     )
 
 
-class NotebookFailure(TypedDict):
+@dataclass
+class NotebookSuccess:
+    notebook_path: Path
+
+
+@dataclass
+class NotebookFailure:
     notebook_path: Path
     error: str
 
@@ -109,19 +116,17 @@ def run_notebook(
     i: int,
     total: int,
     mock: bool = True,
-) -> NotebookFailure | None:
+) -> NotebookFailure | NotebookSuccess:
     logging.info(f"Running notebook: {notebook_path.name}")
     run = mock_run if mock else actual_run
 
     try:
         run(notebook_path, i=i, total=total)
     except Exception as e:
-        logging.error(
-            f"{e.__class__.__name__} encountered running notebook: {str(notebook_path)}"
-        )
+        logging.error(f"{e.__class__.__name__} encountered running notebook: {str(notebook_path)}")
         return NotebookFailure(notebook_path=notebook_path, error=str(e))
     else:
-        return
+        return NotebookSuccess(notebook_path=notebook_path)
 
 
 class RunParams(TypedDict):
@@ -133,19 +138,13 @@ class RunParams(TypedDict):
 
 def run_parameters(notebook_paths: list[Path], mock: bool = True) -> list[RunParams]:
     def to_mock(notebook_path: Path, i: int) -> RunParams:
-        return RunParams(
-            notebook_path=notebook_path, mock=mock, i=i, total=len(notebook_paths)
-        )
+        return RunParams(notebook_path=notebook_path, mock=mock, i=i, total=len(notebook_paths))
 
-    return [
-        to_mock(notebook_path, i=i)
-        for i, notebook_path in enumerate(notebook_paths, start=1)
-    ]
+    return [to_mock(notebook_path, i=i) for i, notebook_path in enumerate(notebook_paths, start=1)]
 
 
 def main(notebooks_to_run: list[Path], mock: bool = True) -> None:
     console = Console()
-    errors: list[NotebookFailure]
     setup_logging()
     logging.info("Starting notebook runner")
     logging.info(f"Running {len(notebooks_to_run)} notebook(s).")
@@ -153,15 +152,21 @@ def main(notebooks_to_run: list[Path], mock: bool = True) -> None:
         delayed(run_notebook)(**run_params)
         for run_params in run_parameters(notebooks_to_run, mock=mock)
     )
-    errors = [result for result in results if result is not None]
+    errors: list[NotebookFailure] = list(filter(lambda x: isinstance(x, NotebookFailure), results))
+    successes: list[NotebookSuccess] = list(
+        filter(lambda x: isinstance(x, NotebookSuccess), results)
+    )
 
     if not errors:
-        logging.info("Notebooks run successfully!")
+        logging.info("All notebooks ran successfully!")
         return
 
     for error in errors:
-        console.rule(f"[bold red]Error running {error['notebook_path']}[/bold red]")
-        console.print(error["error"])
+        console.rule(f"[bold red]Error running {error.notebook_path}[/bold red]")
+        console.print(error.error)
+
+    for success in successes:
+        console.print(f"[bold green]Success running {success.notebook_path}[/bold green]")
 
     logging.error(f"{len(errors)} / {len(notebooks_to_run)} notebooks failed")
 

From dfd3321f3b1473cb02274ed5b0f0bfba8d44fb08 Mon Sep 17 00:00:00 2001
From: Will Dean <wd60622@gmail.com>
Date: Fri, 15 Nov 2024 20:24:24 -0500
Subject: [PATCH 5/6] add a dummy sampling_time

---
 scripts/run_notebooks/injected.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/scripts/run_notebooks/injected.py b/scripts/run_notebooks/injected.py
index c8174343b..ffb8d8f6c 100644
--- a/scripts/run_notebooks/injected.py
+++ b/scripts/run_notebooks/injected.py
@@ -26,10 +26,10 @@ def mock_sample(*args, **kwargs):
         coords={"chain": np.arange(n_chains)},
     )
     idata.add_groups(
-        posterior=(idata.prior.mean("chain") * expanded_chains).transpose(
-            "chain", "draw", ...
-        )
+        posterior=(idata.prior.mean("chain") * expanded_chains).transpose("chain", "draw", ...)
     )
+    idata.posterior.attrs["sampling_time"] = 1.0
+
     if "prior" in idata:
         del idata.prior
     if "prior_predictive" in idata:
@@ -50,9 +50,7 @@ def mock_sample(*args, **kwargs):
                     dims=("chain", "draw"),
                 ),
                 "tree_depth": xr.DataArray(
-                    rng.choice(
-                        [1, 2, 3], p=[0.01, 0.86, 0.13], size=(n_chains, n_draws)
-                    ),
+                    rng.choice([1, 2, 3], p=[0.01, 0.86, 0.13], size=(n_chains, n_draws)),
                     dims=("chain", "draw"),
                 ),
                 "acceptance_rate": xr.DataArray(

From 69563da49ceef589127140d3675a05b2ba4c3ae0 Mon Sep 17 00:00:00 2001
From: Will Dean <wd60622@gmail.com>
Date: Sat, 16 Nov 2024 07:35:59 -0500
Subject: [PATCH 6/6] code format

---
 scripts/run_notebooks/runner.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scripts/run_notebooks/runner.py b/scripts/run_notebooks/runner.py
index 957ecd38d..45ab6e620 100644
--- a/scripts/run_notebooks/runner.py
+++ b/scripts/run_notebooks/runner.py
@@ -138,7 +138,12 @@ class RunParams(TypedDict):
 
 def run_parameters(notebook_paths: list[Path], mock: bool = True) -> list[RunParams]:
     def to_mock(notebook_path: Path, i: int) -> RunParams:
-        return RunParams(notebook_path=notebook_path, mock=mock, i=i, total=len(notebook_paths))
+        return RunParams(
+            notebook_path=notebook_path,
+            mock=mock,
+            i=i,
+            total=len(notebook_paths),
+        )
 
     return [to_mock(notebook_path, i=i) for i, notebook_path in enumerate(notebook_paths, start=1)]