Skip to content

Commit 851e554

Browse files
committed
pre commit correction
1 parent 3d5a97f commit 851e554

File tree

2 files changed

+257
-499
lines changed

2 files changed

+257
-499
lines changed

examples/case_studies/bayesian_ab_testing.ipynb

+228-475
Large diffs are not rendered by default.

myst_nbs/case_studies/bayesian_ab_testing.myst.md

+29-24
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ jupytext:
55
format_name: myst
66
format_version: 0.13
77
jupytext_version: 1.13.7
8+
kernelspec:
9+
display_name: Python 3 (ipykernel)
10+
language: python
11+
name: python3
812
---
913

1014
```{code-cell} ipython3
@@ -16,7 +20,6 @@ import matplotlib.pyplot as plt
1620
import numpy as np
1721
import pandas as pd
1822
import pymc as pm
19-
import pymc.math as pmm
2023
2124
from scipy.stats import bernoulli, expon
2225
@@ -29,6 +32,12 @@ rng = np.random.default_rng(RANDOM_SEED)
2932
3033
%config InlineBackend.figure_format = 'retina'
3134
az.style.use("arviz-darkgrid")
35+
36+
plotting_defaults = dict(
37+
bins=50,
38+
kind="hist",
39+
textsize=10,
40+
)
3241
```
3342

3443
This notebook demonstrates how to implement a Bayesian analysis of an A/B test. We implement the models discussed in VWO's [Bayesian A/B Testing Whitepaper](https://vwo.com/downloads/VWO_SmartStats_technical_whitepaper.pdf), and discuss the effect of different prior choices for these models. This notebook does _not_ discuss other related topics like how to choose a prior, early stopping, and power analysis.
@@ -145,12 +154,12 @@ with strong_prior.create_model(data=[BinomialData(1, 1), BinomialData(1, 1)]):
145154

146155
```{code-cell} ipython3
147156
fig, axs = plt.subplots(2, 1, figsize=(7, 7), sharex=True)
148-
az.plot_posterior(weak_prior_predictive["reluplift_b"], textsize=10, ax=axs[0], kind="hist")
157+
az.plot_posterior(weak_prior_predictive["reluplift_b"], ax=axs[0], **plotting_defaults)
149158
axs[0].set_title(f"B vs. A Rel Uplift Prior Predictive, {weak_prior.priors}", fontsize=10)
150159
axs[0].axvline(x=0, color="red")
151-
az.plot_posterior(strong_prior_predictive["reluplift_b"], textsize=10, ax=axs[1], kind="hist")
160+
az.plot_posterior(strong_prior_predictive["reluplift_b"], ax=axs[1], **plotting_defaults)
152161
axs[1].set_title(f"B vs. A Rel Uplift Prior Predictive, {strong_prior.priors}", fontsize=10)
153-
axs[1].axvline(x=0, color="red")
162+
axs[1].axvline(x=0, color="red");
154163
```
155164

156165
With the weak prior our 94% HDI for the relative uplift for B over A is roughly [-20%, +20%], whereas it is roughly [-2%, +2%] with the strong prior. This is effectively the "starting point" for the relative uplift distribution, and will affect how the observed conversions translate to the posterior distribution.
@@ -200,26 +209,27 @@ def run_scenario_twovariant(
200209
generated = generate_binomial_data(variants, true_rates, samples_per_variant)
201210
data = [BinomialData(**generated[v].to_dict()) for v in variants]
202211
with ConversionModelTwoVariant(priors=weak_prior).create_model(data):
203-
trace_weak = pm.sample(draws=5000, cores=1, chains=2)
212+
trace_weak = pm.sample(draws=5000)
204213
with ConversionModelTwoVariant(priors=strong_prior).create_model(data):
205-
trace_strong = pm.sample(draws=5000, cores=1, chains=2)
214+
trace_strong = pm.sample(draws=5000)
206215
207216
true_rel_uplift = true_rates[1] / true_rates[0] - 1
208217
209218
fig, axs = plt.subplots(2, 1, figsize=(7, 7), sharex=True)
210-
az.plot_posterior(trace_weak.posterior["reluplift_b"], textsize=10, ax=axs[0], kind="hist")
219+
az.plot_posterior(trace_weak.posterior["reluplift_b"], ax=axs[0], **plotting_defaults)
211220
axs[0].set_title(f"True Rel Uplift = {true_rel_uplift:.1%}, {weak_prior}", fontsize=10)
212221
axs[0].axvline(x=0, color="red")
213-
az.plot_posterior(trace_strong.posterior["reluplift_b"], textsize=10, ax=axs[1], kind="hist")
222+
az.plot_posterior(trace_strong.posterior["reluplift_b"], ax=axs[1], **plotting_defaults)
214223
axs[1].set_title(f"True Rel Uplift = {true_rel_uplift:.1%}, {strong_prior}", fontsize=10)
215224
axs[1].axvline(x=0, color="red")
216225
fig.suptitle("B vs. A Rel Uplift")
226+
return trace_weak, trace_strong
217227
```
218228

219229
#### Scenario 1 - same underlying conversion rates
220230

221231
```{code-cell} ipython3
222-
run_scenario_twovariant(
232+
trace_weak, trace_strong = run_scenario_twovariant(
223233
variants=["A", "B"],
224234
true_rates=[0.23, 0.23],
225235
samples_per_variant=100000,
@@ -286,7 +296,7 @@ class ConversionModel:
286296
elif comparison_method == "best_of_rest":
287297
others = [p[j] for j in range(num_variants) if j != i]
288298
if len(others) > 1:
289-
comparison = pmm.maximum(*others)
299+
comparison = pm.math.maximum(*others)
290300
else:
291301
comparison = others[0]
292302
else:
@@ -306,17 +316,15 @@ def run_scenario_bernoulli(
306316
generated = generate_binomial_data(variants, true_rates, samples_per_variant)
307317
data = [BinomialData(**generated[v].to_dict()) for v in variants]
308318
with ConversionModel(priors).create_model(data=data, comparison_method=comparison_method):
309-
trace = pm.sample(draws=5000, chains=2, cores=1)
319+
trace = pm.sample(draws=5000)
310320
311321
n_plots = len(variants)
312322
fig, axs = plt.subplots(nrows=n_plots, ncols=1, figsize=(3 * n_plots, 7), sharex=True)
313323
for i, variant in enumerate(variants):
314324
if i == 0 and comparison_method == "compare_to_control":
315325
axs[i].set_yticks([])
316326
else:
317-
az.plot_posterior(
318-
trace.posterior[f"reluplift_{i}"], textsize=10, ax=axs[i], kind="hist"
319-
)
327+
az.plot_posterior(trace.posterior[f"reluplift_{i}"], ax=axs[i], **plotting_defaults)
320328
axs[i].set_title(f"Rel Uplift {variant}, True Rate = {true_rates[i]:.2%}", fontsize=10)
321329
axs[i].axvline(x=0, color="red")
322330
fig.suptitle(f"Method {comparison_method}, {priors}")
@@ -447,9 +455,9 @@ class RevenueModel:
447455
others_lam = [1 / lam[j] for j in range(num_variants) if j != i]
448456
others_rpv = [revenue_per_visitor[j] for j in range(num_variants) if j != i]
449457
if len(others_rpv) > 1:
450-
comparison_theta = pmm.maximum(*others_theta)
451-
comparison_lam = pmm.maximum(*others_lam)
452-
comparison_rpv = pmm.maximum(*others_rpv)
458+
comparison_theta = pm.math.maximum(*others_theta)
459+
comparison_lam = pm.math.maximum(*others_lam)
460+
comparison_rpv = pm.math.maximum(*others_rpv)
453461
else:
454462
comparison_theta = others_theta[0]
455463
comparison_lam = others_lam[0]
@@ -493,9 +501,9 @@ with RevenueModel(c_prior, mp_prior).create_model(data, "best_of_rest"):
493501

494502
```{code-cell} ipython3
495503
fig, ax = plt.subplots()
496-
az.plot_posterior(revenue_prior_predictive["reluplift_1"], textsize=10, ax=ax, kind="hist")
504+
az.plot_posterior(revenue_prior_predictive["reluplift_1"], ax=ax, **plotting_defaults)
497505
ax.set_title(f"Revenue Rel Uplift Prior Predictive, {c_prior}, {mp_prior}", fontsize=10)
498-
ax.axvline(x=0, color="red")
506+
ax.axvline(x=0, color="red");
499507
```
500508

501509
Similar to the model for Bernoulli conversions, the width of the prior predictive uplift distribution will depend on the strength of our priors. See the Bernoulli conversions section for a discussion of the benefits and disadvantages of using a weak vs. strong prior.
@@ -557,9 +565,7 @@ def run_scenario_value(
557565
if i == 0 and comparison_method == "compare_to_control":
558566
axs[i].set_yticks([])
559567
else:
560-
az.plot_posterior(
561-
trace.posterior[f"reluplift_{i}"], textsize=10, ax=axs[i], kind="hist"
562-
)
568+
az.plot_posterior(trace.posterior[f"reluplift_{i}"], ax=axs[i], **plotting_defaults)
563569
true_rpv = true_conversion_rates[i] * true_mean_purchase[i]
564570
axs[i].set_title(f"Rel Uplift {variant}, True RPV = {true_rpv:.2f}", fontsize=10)
565571
axs[i].axvline(x=0, color="red")
@@ -607,8 +613,7 @@ scenario_value_2 = run_scenario_value(
607613
axs = az.plot_posterior(
608614
scenario_value_2,
609615
var_names=["theta_reluplift_1", "reciprocal_lam_reluplift_1"],
610-
textsize=10,
611-
kind="hist",
616+
**plotting_defaults,
612617
)
613618
axs[0].set_title(f"Conversion Rate Uplift B, True Uplift = {(0.04 / 0.05 - 1):.2%}", fontsize=10)
614619
axs[0].axvline(x=0, color="red")

0 commit comments

Comments
 (0)