From b5f7d86088f2afb78261e7207712e5b954a8bdd8 Mon Sep 17 00:00:00 2001 From: Will Dean Date: Fri, 15 Nov 2024 18:46:19 -0500 Subject: [PATCH 1/6] add notebook runner --- environment.yml | 24 ++++ scripts/run_notebooks/injected.py | 76 +++++++++++++ scripts/run_notebooks/runner.py | 181 ++++++++++++++++++++++++++++++ 3 files changed, 281 insertions(+) create mode 100644 environment.yml create mode 100644 scripts/run_notebooks/injected.py create mode 100644 scripts/run_notebooks/runner.py diff --git a/environment.yml b/environment.yml new file mode 100644 index 000000000..4d57dac77 --- /dev/null +++ b/environment.yml @@ -0,0 +1,24 @@ +name: pymc-examples +channels: +- conda-forge +dependencies: +- python=3.11 +- pymc +- pymc-bart +- nutpie +# spatial notebooks +- geopandas +- folium +- libpysal +- rasterio +- pip: + - pymc-experimental + - preliz + - bambi + - jax + - papermill + - joblib + - jupyter + - seaborn + - watermark + - lifelines diff --git a/scripts/run_notebooks/injected.py b/scripts/run_notebooks/injected.py new file mode 100644 index 000000000..c8174343b --- /dev/null +++ b/scripts/run_notebooks/injected.py @@ -0,0 +1,76 @@ +"""Injected code to the top of each notebook to mock long running code.""" + +import os +import numpy as np +import pymc as pm +import xarray as xr + + +def mock_sample(*args, **kwargs): + if len(args) > 0: + draws = args[0] + else: + draws = kwargs.get("draws", 1000) + random_seed = kwargs.get("random_seed", None) + rng = np.random.default_rng(random_seed) + model = kwargs.get("model", None) + chains = kwargs.get("chains", os.cpu_count()) + idata = pm.sample_prior_predictive( + model=model, + random_seed=random_seed, + samples=draws, + ) + n_chains = chains + expanded_chains = xr.DataArray( + np.ones(n_chains), + coords={"chain": np.arange(n_chains)}, + ) + idata.add_groups( + posterior=(idata.prior.mean("chain") * expanded_chains).transpose( + "chain", "draw", ... + ) + ) + if "prior" in idata: + del idata.prior + if "prior_predictive" in idata: + del idata.prior_predictive + + # Create mock sample stats with diverging data + if "sample_stats" not in idata: + n_chains = chains + n_draws = draws + sample_stats = xr.Dataset( + { + "diverging": xr.DataArray( + np.zeros((n_chains, n_draws), dtype=int), + dims=("chain", "draw"), + ), + "energy": xr.DataArray( + rng.normal(loc=150, scale=2.5, size=(n_chains, n_draws)), + dims=("chain", "draw"), + ), + "tree_depth": xr.DataArray( + rng.choice( + [1, 2, 3], p=[0.01, 0.86, 0.13], size=(n_chains, n_draws) + ), + dims=("chain", "draw"), + ), + "acceptance_rate": xr.DataArray( + rng.beta(0.5, 0.5, size=(n_chains, n_draws)), + dims=("chain", "draw"), + ), + # Different sampler + "accept": xr.DataArray( + rng.choice([0, 1], size=(n_chains, n_draws)), + dims=("chain", "draw"), + ), + } + ) + idata.add_groups(sample_stats=sample_stats) + + return idata + + +pm.sample = mock_sample +pm.HalfFlat = pm.HalfNormal +pm.Flat = pm.Normal diff --git a/scripts/run_notebooks/runner.py b/scripts/run_notebooks/runner.py new file mode 100644 index 000000000..d1291754d --- /dev/null +++ b/scripts/run_notebooks/runner.py @@ -0,0 +1,181 @@ +"""Script to run all notebooks in the docs/source/notebooks directory.""" + +from argparse import ArgumentParser + +from rich.console import Console +import logging +from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import TypedDict +from uuid import uuid4 + +import papermill +from joblib import Parallel, delayed +from nbformat.notebooknode import NotebookNode +from papermill.iorw import load_notebook_node, write_ipynb + +KERNEL_NAME: str = "python3" + +HERE = Path(__file__).parent +INJECTED_CODE_FILE = HERE / "injected.py" +INJECTED_CODE = INJECTED_CODE_FILE.read_text() + + +def setup_logging() -> None: + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + + +def generate_random_id() -> str: + return str(uuid4()) + + +def inject_pymc_sample_mock_code(cells: list) -> None: + cells.insert( + 0, + NotebookNode( + id=f"code-injection-{generate_random_id()}", + execution_count=sum(map(ord, "Mock pm.sample")), + cell_type="code", + metadata={"tags": []}, + outputs=[], + source=INJECTED_CODE, + ), + ) + + +def mock_run(notebook_path: Path, i: int, total: int) -> None: + nb = load_notebook_node(str(notebook_path)) + inject_pymc_sample_mock_code(nb.cells) + with NamedTemporaryFile(suffix=".ipynb") as f: + write_ipynb(nb, f.name) + desc = f"({i} / {total}) Mocked {notebook_path.name}" + papermill.execute_notebook( + input_path=f.name, + output_path=None, + progress_bar=dict(desc=desc), + kernel_name=KERNEL_NAME, + cwd=notebook_path.parent, + ) + + +def actual_run(notebook_path: Path, i: int, total: int) -> None: + papermill.execute_notebook( + input_path=notebook_path, + output_path=None, + kernel_name=KERNEL_NAME, + progress_bar={"desc": f"({i} / {total}) Running {notebook_path.name}"}, + cwd=notebook_path.parent, + ) + + +class NotebookFailure(TypedDict): + notebook_path: Path + error: str + + +def run_notebook( + notebook_path: Path, + i: int, + total: int, + mock: bool = True, +) -> NotebookFailure | None: + logging.info(f"Running notebook: {notebook_path.name}") + run = mock_run if mock else actual_run + + try: + run(notebook_path, i=i, total=total) + except Exception as e: + logging.error( + f"{e.__class__.__name__} encountered running notebook: {str(notebook_path)}" + ) + return NotebookFailure(notebook_path=notebook_path, error=str(e)) + else: + return + + +class RunParams(TypedDict): + notebook_path: Path + mock: bool + i: int + total: int + + +def run_parameters(notebook_paths: list[Path], mock: bool = True) -> list[RunParams]: + def to_mock(notebook_path: Path, i: int) -> RunParams: + return RunParams( + notebook_path=notebook_path, mock=mock, i=i, total=len(notebook_paths) + ) + + return [ + to_mock(notebook_path, i=i) + for i, notebook_path in enumerate(notebook_paths, start=1) + ] + + +def main(notebooks_to_run: list[Path], mock: bool = True) -> None: + console = Console() + errors: list[NotebookFailure] + setup_logging() + logging.info("Starting notebook runner") + logging.info(f"Running {len(notebooks_to_run)} notebook(s).") + results = Parallel(n_jobs=-1)( + delayed(run_notebook)(**run_params) + for run_params in run_parameters(notebooks_to_run, mock=mock) + ) + errors = [result for result in results if result is not None] + + if not errors: + logging.info("Notebooks run successfully!") + return + + for error in errors: + console.rule(f"[bold red]Error running {error['notebook_path']}[/bold red]") + console.print(error["error"]) + + logging.error(f"{len(errors)} / {len(notebooks_to_run)} notebooks failed") + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument( + "--notebooks", + nargs="+", + help="List of notebooks to run. If not provided, all notebooks will be run.", + ) + mock_group = parser.add_mutually_exclusive_group() + mock_group.add_argument( + "--mock", + action="store_true", + help="Run notebooks with mock code", + dest="mock", + ) + mock_group.add_argument( + "--no-mock", + action="store_false", + help="Run notebooks without mock code", + dest="mock", + ) + parser.set_defaults(mock=True) + args = parser.parse_args() + + notebooks_to_run = [] + notebooks = args.notebooks + notebooks = [Path(notebook) for notebook in notebooks] + for notebook in notebooks: + if notebook.is_dir(): + notebooks_to_run.extend(notebook.glob("*.ipynb")) + notebooks_to_run.extend(notebook.glob("*/*.ipynb")) + else: + notebooks_to_run.append(notebook) + + args.notebooks = notebooks_to_run + + return args + + +if __name__ == "__main__": + args = parse_args() + main(args.notebooks, mock=args.mock) From ff441e03df7f4c806ede8b3422cb36075a300b1a Mon Sep 17 00:00:00 2001 From: Will Dean Date: Fri, 15 Nov 2024 18:49:10 -0500 Subject: [PATCH 2/6] add a docstring --- scripts/run_notebooks/runner.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/scripts/run_notebooks/runner.py b/scripts/run_notebooks/runner.py index d1291754d..06e5c741f 100644 --- a/scripts/run_notebooks/runner.py +++ b/scripts/run_notebooks/runner.py @@ -1,4 +1,22 @@ -"""Script to run all notebooks in the docs/source/notebooks directory.""" +"""CLI to notebook or directory of notebooks. + +Arguments +--------- +--notebooks: Specific notebook or directory of notebooks to run. +--mock: Run notebooks with mock code. Default is True. If --no-mock is provided, + notebooks will run without mock code. + +Examples +-------- +Run all notebooks in a directory with mock code: + + $ python scripts/run_notebooks/runner.py --notebooks notebooks/ --mock + +Run a single notebook without mocked code: + + $ python scripts/run_notebooks/runner.py --notebooks notebooks/notebook.ipynb --no-mock + +""" from argparse import ArgumentParser From cdfb6ab7c3053b240fcbb865a2f2247085bc2389 Mon Sep 17 00:00:00 2001 From: Will Dean Date: Fri, 15 Nov 2024 18:54:01 -0500 Subject: [PATCH 3/6] add a docstring --- scripts/run_notebooks/runner.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/run_notebooks/runner.py b/scripts/run_notebooks/runner.py index 06e5c741f..c9d6d799a 100644 --- a/scripts/run_notebooks/runner.py +++ b/scripts/run_notebooks/runner.py @@ -10,11 +10,21 @@ -------- Run all notebooks in a directory with mock code: - $ python scripts/run_notebooks/runner.py --notebooks notebooks/ --mock +.. code-block:: bash + + python scripts/run_notebooks/runner.py --notebooks notebooks/ --mock Run a single notebook without mocked code: - $ python scripts/run_notebooks/runner.py --notebooks notebooks/notebook.ipynb --no-mock +.. code-block:: bash + + python scripts/run_notebooks/runner.py --notebooks notebooks/notebook.ipynb --no-mock + +Run all the notebook is two different directories with mocked code (default): + +.. code-block:: bash + + python scripts/run_notebooks/runner.py --notebooks notebooks/ notebooks2/ """ From 059d628e3fc43754ffb5ba2a5877f9093afa3e81 Mon Sep 17 00:00:00 2001 From: Will Dean Date: Fri, 15 Nov 2024 19:17:28 -0500 Subject: [PATCH 4/6] print out the successes as well --- scripts/run_notebooks/runner.py | 41 ++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/scripts/run_notebooks/runner.py b/scripts/run_notebooks/runner.py index c9d6d799a..957ecd38d 100644 --- a/scripts/run_notebooks/runner.py +++ b/scripts/run_notebooks/runner.py @@ -31,6 +31,7 @@ from argparse import ArgumentParser from rich.console import Console +from dataclasses import dataclass import logging from pathlib import Path from tempfile import NamedTemporaryFile @@ -99,7 +100,13 @@ def actual_run(notebook_path: Path, i: int, total: int) -> None: ) -class NotebookFailure(TypedDict): +@dataclass +class NotebookSuccess: + notebook_path: Path + + +@dataclass +class NotebookFailure: notebook_path: Path error: str @@ -109,19 +116,17 @@ def run_notebook( i: int, total: int, mock: bool = True, -) -> NotebookFailure | None: +) -> NotebookFailure | NotebookSuccess: logging.info(f"Running notebook: {notebook_path.name}") run = mock_run if mock else actual_run try: run(notebook_path, i=i, total=total) except Exception as e: - logging.error( - f"{e.__class__.__name__} encountered running notebook: {str(notebook_path)}" - ) + logging.error(f"{e.__class__.__name__} encountered running notebook: {str(notebook_path)}") return NotebookFailure(notebook_path=notebook_path, error=str(e)) else: - return + return NotebookSuccess(notebook_path=notebook_path) class RunParams(TypedDict): @@ -133,19 +138,13 @@ class RunParams(TypedDict): def run_parameters(notebook_paths: list[Path], mock: bool = True) -> list[RunParams]: def to_mock(notebook_path: Path, i: int) -> RunParams: - return RunParams( - notebook_path=notebook_path, mock=mock, i=i, total=len(notebook_paths) - ) + return RunParams(notebook_path=notebook_path, mock=mock, i=i, total=len(notebook_paths)) - return [ - to_mock(notebook_path, i=i) - for i, notebook_path in enumerate(notebook_paths, start=1) - ] + return [to_mock(notebook_path, i=i) for i, notebook_path in enumerate(notebook_paths, start=1)] def main(notebooks_to_run: list[Path], mock: bool = True) -> None: console = Console() - errors: list[NotebookFailure] setup_logging() logging.info("Starting notebook runner") logging.info(f"Running {len(notebooks_to_run)} notebook(s).") @@ -153,15 +152,21 @@ def main(notebooks_to_run: list[Path], mock: bool = True) -> None: delayed(run_notebook)(**run_params) for run_params in run_parameters(notebooks_to_run, mock=mock) ) - errors = [result for result in results if result is not None] + errors: list[NotebookFailure] = list(filter(lambda x: isinstance(x, NotebookFailure), results)) + successes: list[NotebookSuccess] = list( + filter(lambda x: isinstance(x, NotebookSuccess), results) + ) if not errors: - logging.info("Notebooks run successfully!") + logging.info("All notebooks ran successfully!") return for error in errors: - console.rule(f"[bold red]Error running {error['notebook_path']}[/bold red]") - console.print(error["error"]) + console.rule(f"[bold red]Error running {error.notebook_path}[/bold red]") + console.print(error.error) + + for success in successes: + console.print(f"[bold green]Success running {success.notebook_path}[/bold green]") logging.error(f"{len(errors)} / {len(notebooks_to_run)} notebooks failed") From dfd3321f3b1473cb02274ed5b0f0bfba8d44fb08 Mon Sep 17 00:00:00 2001 From: Will Dean Date: Fri, 15 Nov 2024 20:24:24 -0500 Subject: [PATCH 5/6] add a dummy sampling_time --- scripts/run_notebooks/injected.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/run_notebooks/injected.py b/scripts/run_notebooks/injected.py index c8174343b..ffb8d8f6c 100644 --- a/scripts/run_notebooks/injected.py +++ b/scripts/run_notebooks/injected.py @@ -26,10 +26,10 @@ def mock_sample(*args, **kwargs): coords={"chain": np.arange(n_chains)}, ) idata.add_groups( - posterior=(idata.prior.mean("chain") * expanded_chains).transpose( - "chain", "draw", ... - ) + posterior=(idata.prior.mean("chain") * expanded_chains).transpose("chain", "draw", ...) ) + idata.posterior.attrs["sampling_time"] = 1.0 + if "prior" in idata: del idata.prior if "prior_predictive" in idata: @@ -50,9 +50,7 @@ def mock_sample(*args, **kwargs): dims=("chain", "draw"), ), "tree_depth": xr.DataArray( - rng.choice( - [1, 2, 3], p=[0.01, 0.86, 0.13], size=(n_chains, n_draws) - ), + rng.choice([1, 2, 3], p=[0.01, 0.86, 0.13], size=(n_chains, n_draws)), dims=("chain", "draw"), ), "acceptance_rate": xr.DataArray( From 69563da49ceef589127140d3675a05b2ba4c3ae0 Mon Sep 17 00:00:00 2001 From: Will Dean Date: Sat, 16 Nov 2024 07:35:59 -0500 Subject: [PATCH 6/6] code format --- scripts/run_notebooks/runner.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/run_notebooks/runner.py b/scripts/run_notebooks/runner.py index 957ecd38d..45ab6e620 100644 --- a/scripts/run_notebooks/runner.py +++ b/scripts/run_notebooks/runner.py @@ -138,7 +138,12 @@ class RunParams(TypedDict): def run_parameters(notebook_paths: list[Path], mock: bool = True) -> list[RunParams]: def to_mock(notebook_path: Path, i: int) -> RunParams: - return RunParams(notebook_path=notebook_path, mock=mock, i=i, total=len(notebook_paths)) + return RunParams( + notebook_path=notebook_path, + mock=mock, + i=i, + total=len(notebook_paths), + ) return [to_mock(notebook_path, i=i) for i, notebook_path in enumerate(notebook_paths, start=1)]