Skip to content

Commit 1ea62a7

Browse files
committed
[Hierarchical BVAR pymc-devs#286] added forest plots, corrected typos and improved discussion.
Signed-off-by: Nathaniel <[email protected]>
1 parent d1788f0 commit 1ea62a7

File tree

2 files changed

+6812
-6525
lines changed

2 files changed

+6812
-6525
lines changed

examples/time_series/bayesian_var_model.ipynb

+6,743-6,505
Large diffs are not rendered by default.

myst_nbs/time_series/bayesian_var_model.myst.md

+69-20
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,13 @@ kernelspec:
1414
(Bayesian Vector Autoregressive Models)=
1515
# Bayesian Vector Autoregressive Models
1616

17-
:::{post} Oct, 2022
17+
:::{post} November, 2022
1818
:tags: time series, VARs
1919
:category: intermediate
2020
:author: Nathaniel Forde
2121
:::
2222

2323
```{code-cell} ipython3
24-
import pickle
25-
2624
import aesara as at
2725
import arviz as az
2826
import matplotlib.dates as mdates
@@ -33,7 +31,6 @@ import pymc as pm
3331
import statsmodels.api as sm
3432
3533
from pymc.sampling_jax import sample_blackjax_nuts
36-
from statsmodels.tsa.api import VAR
3734
```
3835

3936
```{code-cell} ipython3
@@ -317,12 +314,12 @@ def plot_ppc(idata, df, group="posterior_predictive"):
317314
axs = axs.flatten()
318315
ppc = az.extract_dataset(idata, group=group, num_samples=100)["obs"]
319316
# Minus the lagged terms and the constant
320-
shade_background(ppc, axs, 0, "viridis")
317+
shade_background(ppc, axs, 0, "plasma")
321318
axs[0].plot(np.arange(ppc.shape[0]), ppc[:, 0, :].mean(axis=1), color="cyan", label="Mean")
322319
axs[0].plot(df["x"], "o", color="black", markersize=6, label="Observed")
323320
axs[0].set_title("VAR Series 1")
324321
axs[0].legend()
325-
shade_background(ppc, axs, 1, "viridis")
322+
shade_background(ppc, axs, 1, "plasma")
326323
axs[1].plot(df["y"], "o", color="black", markersize=6, label="Observed")
327324
axs[1].plot(np.arange(ppc.shape[0]), ppc[:, 1, :].mean(axis=1), color="cyan", label="Mean")
328325
axs[1].set_title("VAR Series 2")
@@ -342,6 +339,8 @@ plot_corr_matrix(idata_fake_data, group="posterior")
342339

343340
## Applying the Theory: Macro Economic Timeseries
344341

342+
The data is from the World Bank’s World Development Indicators. In particular, we're pulling annual values of GDP, consumption, and gross fixed capital formation (investment) for all countries from 1970. Timeseries models in general work best when we have a stable mean throughout the series, so for the estimation procedure we have taken the first difference and the natural log of each of these series.
343+
345344
```{code-cell} ipython3
346345
gdp_hierarchical = pd.read_csv("../data/gdp_data_hierarchical_clean.csv", index_col=0)
347346
gdp_hierarchical
@@ -399,12 +398,12 @@ def plot_ppc_macro(idata, df, group="posterior_predictive"):
399398
axs = axs.flatten()
400399
ppc = az.extract_dataset(idata, group=group, num_samples=100)["obs"]
401400
402-
shade_background(ppc, axs, 0)
401+
shade_background(ppc, axs, 0, "plasma")
403402
axs[0].plot(np.arange(ppc.shape[0]), ppc[:, 0, :].mean(axis=1), color="cyan", label="Mean")
404403
axs[0].plot(df["dl_gdp"], "o", color="black", markersize=6, label="Observed")
405404
axs[0].set_title("Differenced and Logged GDP")
406405
axs[0].legend()
407-
shade_background(ppc, axs, 1)
406+
shade_background(ppc, axs, 1, "plasma")
408407
axs[1].plot(df["dl_cons"], "o", color="black", markersize=6, label="Observed")
409408
axs[1].plot(np.arange(ppc.shape[0]), ppc[:, 1, :].mean(axis=1), color="cyan", label="Mean")
410409
axs[1].set_title("Differenced and Logged Consumption")
@@ -476,7 +475,7 @@ def make_hierarchical_model(n_lags, n_eqs, df, group_field, mv_norm=True, prior_
476475
mu=beta_hat_location,
477476
sigma=beta_hat_scale * z_scale_beta,
478477
dims=["equations", "lags", "cross_vars"],
479-
) # filters x rows x columns
478+
)
480479
alpha = pm.Normal(
481480
f"alpha_{grp}",
482481
mu=alpha_hat_location,
@@ -549,6 +548,8 @@ az.plot_trace(
549548
az.plot_ppc(idata_full_test);
550549
```
551550

551+
Next we'll look at some of the summary statistics and how they vary across the countries.
552+
552553
```{code-cell} ipython3
553554
az.summary(
554555
idata_full_test,
@@ -558,13 +559,65 @@ az.summary(
558559
"alpha_hat_scale",
559560
"beta_hat_location",
560561
"beta_hat_scale",
562+
"z_scale_alpha_Ireland",
563+
"z_scale_alpha_United States",
564+
"z_scale_beta_Ireland",
565+
"z_scale_beta_United States",
566+
"alpha_Ireland",
567+
"alpha_United States",
561568
"omega_global_corr",
562569
"lag_coefs_Ireland",
563570
"lag_coefs_United States",
564571
],
565572
)
566573
```
567574

575+
```{code-cell} ipython3
576+
ax = az.plot_forest(
577+
idata_full_test,
578+
var_names=[
579+
"alpha_Ireland",
580+
"alpha_United States",
581+
"alpha_Australia",
582+
"alpha_Chile",
583+
"alpha_New Zealand",
584+
"alpha_South Africa",
585+
"alpha_Canada",
586+
"alpha_United Kingdom",
587+
],
588+
kind="ridgeplot",
589+
combined=True,
590+
ridgeplot_truncate=False,
591+
ridgeplot_quantiles=[0.25, 0.5, 0.75],
592+
ridgeplot_overlap=0.7,
593+
figsize=(10, 10),
594+
)
595+
596+
ax[0].set_title("Intercept Parameters for each country \n and Economic Measure");
597+
```
598+
599+
```{code-cell} ipython3
600+
ax = az.plot_forest(
601+
idata_full_test,
602+
var_names=[
603+
"lag_coefs_Ireland",
604+
"lag_coefs_United States",
605+
"lag_coefs_Australia",
606+
"lag_coefs_Chile",
607+
"lag_coefs_New Zealand",
608+
"lag_coefs_South Africa",
609+
"lag_coefs_Canada",
610+
"lag_coefs_United Kingdom",
611+
],
612+
kind="ridgeplot",
613+
ridgeplot_truncate=False,
614+
figsize=(10, 10),
615+
coords={"equations": "dl_cons", "lags": 1, "cross_vars": "dl_gdp"},
616+
)
617+
618+
ax[0].set_title("Lag Coefficient for the first lag of GDP on Consumption \n by Country");
619+
```
620+
568621
```{code-cell} ipython3
569622
corr = pd.DataFrame(
570623
az.summary(idata_full_test, var_names=["omega_global_corr"])["mean"].values.reshape(3, 3),
@@ -574,7 +627,7 @@ corr.index = ["GDP", "CONS", "GFCF"]
574627
corr
575628
```
576629

577-
We can see these estimates of the correlations between the 3 economic variables differ markedly from the simple case where we examined Ireland alone. Next we'll plot the model fits for each country to ensure that the predictive distribution can recover the observed data.
630+
We can see these estimates of the correlations between the 3 economic variables differ markedly from the simple case where we examined Ireland alone. Which suggests that we have learned something about the relationship between these variables which would not be clear examining the Irish case alone. Next we'll plot the model fits for each country to ensure that the predictive distribution can recover the observed data. It is important for the question of model adequacy that we can recover both the outlier case of Ireland and the more regular countries such as Australia and United States.
578631

579632
```{code-cell} ipython3
580633
countries = gdp_hierarchical["country"].unique()
@@ -590,7 +643,7 @@ for ax, country in zip(axs, countries):
590643
f"obs_{country}"
591644
]
592645
for i in range(3):
593-
shade_background(ppc, ax, i)
646+
shade_background(ppc, ax, i, "plasma")
594647
ax[0].plot(np.arange(ppc.shape[0]), ppc[:, 0, :].mean(axis=1), color="cyan", label="Mean")
595648
ax[0].plot(temp["dl_gdp"], "o", color="black", markersize=4, label="Observed")
596649
ax[0].set_title(f"Posterior Predictive GDP: {country}")
@@ -606,19 +659,15 @@ for ax, country in zip(axs, countries):
606659
plt.suptitle("Posterior Predictive Checks on Hierarchical VAR", fontsize=20);
607660
```
608661

609-
Here we can see that the model appears to have recovered reasonable enough posterior predictions for the observed data. We'll save these model fits to be used in the next post in the series.
610-
611-
```{code-cell} ipython3
612-
with open("../data/hierarchical_var_fit.pickle", "wb") as handle:
613-
pickle.dump(idata_full_test, handle, protocol=pickle.HIGHEST_PROTOCOL)
662+
Here we can see that the model appears to have recovered reasonable enough posterior predictions for the observed data.
614663

615-
with open("../data/ireland_var_fit.pickle", "wb") as handle:
616-
pickle.dump(idata_ireland, handle, protocol=pickle.HIGHEST_PROTOCOL)
617-
```
664+
+++
618665

619666
## Conclusion
620667

621-
VAR modelling is a rich an interesting area of research within economics and there are a range of challenges and pitfalls which come with the interpretation and understanding of these models. We hope this example encourages you to continue exploring the potential of this kind of VAR modelling in the Bayesian framework. Whether you're interested in the relationship between grand economic theory or simpler questions about the impact of poor app performance on customer feedback, VAR models give you a powerful tool for interrogating these relationships over time.
668+
VAR modelling is a rich an interesting area of research within economics and there are a range of challenges and pitfalls which come with the interpretation and understanding of these models. We hope this example encourages you to continue exploring the potential of this kind of VAR modelling in the Bayesian framework. Whether you're interested in the relationship between grand economic theory or simpler questions about the impact of poor app performance on customer feedback, VAR models give you a powerful tool for interrogating these relationships over time. As we've seen Hierarchical VARs further enables the precise quantification of outliers within a cohort and does not throw away the information because of odd accounting practices engendered by international capitalism.
669+
670+
In the next post in this series we will spend some time digging into the implied relationships between the timeseries which result from fitting our VAR models.
622671

623672
+++
624673

0 commit comments

Comments
 (0)