diff --git a/Chapter3_MCMC/IntroMCMC.ipynb b/Chapter3_MCMC/IntroMCMC.ipynb index 7a5d7084..8157c90c 100644 --- a/Chapter3_MCMC/IntroMCMC.ipynb +++ b/Chapter3_MCMC/IntroMCMC.ipynb @@ -146,12 +146,12 @@ "cell_type": "code", "collapsed": false, "input": [ - "### create the observed data\n", + "# create the observed data\n", "\n", - "#sample size of data we observe, trying varying this (keep it less than 100 ;)\n", + "# sample size of data we observe, trying varying this (keep it less than 100 ;)\n", "N = 1\n", "\n", - "#the true parameters, but of course we do not see these values...\n", + "# the true parameters, but of course we do not see these values...\n", "lambda_1_true = 1\n", "lambda_2_true = 3\n", "\n", @@ -159,16 +159,16 @@ "data = np.concatenate([\n", " stats.poisson.rvs(lambda_1_true, size=(N, 1)),\n", " stats.poisson.rvs(lambda_2_true, size=(N, 1))\n", - " ], axis=1)\n", + "], axis=1)\n", "print \"observed (2-dimensional,sample size = %d):\" % N, data\n", "\n", - "#plotting details.\n", + "# plotting details.\n", "x = y = np.linspace(.01, 5, 100)\n", "likelihood_x = np.array([stats.poisson.pmf(data[:, 0], _x)\n", " for _x in x]).prod(axis=1)\n", "likelihood_y = np.array([stats.poisson.pmf(data[:, 1], _y)\n", " for _y in y]).prod(axis=1)\n", - "L = np.dot(likelihood_x[:, None], likelihood_y[None, :])" + "L = np.dot(likelihood_x[:, None], likelihood_y[None, :]);" ], "language": "python", "metadata": {}, @@ -188,7 +188,7 @@ "collapsed": false, "input": [ "figsize(12.5, 12)\n", - "#matplotlib heavy lifting below, beware!\n", + "# matplotlib heavy lifting below, beware!\n", "plt.subplot(221)\n", "uni_x = stats.uniform.pdf(x, loc=0, scale=5)\n", "uni_y = stats.uniform.pdf(x, loc=0, scale=5)\n", @@ -319,12 +319,12 @@ "collapsed": false, "input": [ "figsize(12.5, 4)\n", - "data = np.loadtxt(\"data/mixture_data.csv\", delimiter=\",\")\n", + "data = np.loadtxt(\"data/mixture_data.csv\", delimiter=\",\")\n", "\n", - "plt.hist(data, bins=20, color=\"k\", histtype=\"stepfilled\", alpha=0.8)\n", + "plt.hist(data, bins=20, color=\"k\", histtype=\"stepfilled\", alpha=0.8)\n", "plt.title(\"Histogram of the dataset\")\n", "plt.ylim([0, None])\n", - "print data[:10], \"...\"" + "print data[:10], \"...\";" ], "language": "python", "metadata": {}, @@ -376,9 +376,9 @@ "\n", "p = pm.Uniform(\"p\", 0, 1)\n", "\n", - "assignment = pm.Categorical(\"assignment\", [p, 1-p], size=data.shape[0])\n", + "assignment = pm.Categorical(\"assignment\", [p, 1 - p], size=data.shape[0])\n", "print \"prior assignment, with p = %.2f:\" % p.value\n", - "print assignment.value[:10], \"...\"" + "print assignment.value[:10], \"...\";" ], "language": "python", "metadata": {}, @@ -415,7 +415,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "taus = 1.0/pm.Uniform(\"stds\", 0, 100, size=2) ** 2\n", + "taus = 1.0 / pm.Uniform(\"stds\", 0, 100, size=2) ** 2\n", "centers = pm.Normal(\"centers\", [120, 190], [0.01, 0.01], size=2)\n", "\n", "\"\"\"\n", @@ -423,17 +423,19 @@ "to a set of parameters, located in the (1,2) arrays `taus` and `centers`.\n", "\"\"\"\n", "\n", + "\n", "@pm.deterministic\n", "def center_i(assignment=assignment, centers=centers):\n", " return centers[assignment]\n", "\n", + "\n", "@pm.deterministic\n", "def tau_i(assignment=assignment, taus=taus):\n", " return taus[assignment]\n", "\n", "print \"Random assignments: \", assignment.value[:4], \"...\"\n", "print \"Assigned center: \", center_i.value[:4], \"...\"\n", - "print \"Assigned precision: \", tau_i.value[:4], \"...\"" + "print \"Assigned precision: \", tau_i.value[:4], \"...\";" ], "language": "python", "metadata": {}, @@ -454,11 +456,11 @@ "cell_type": "code", "collapsed": false, "input": [ - "#and to combine it with the observations:\n", + "# and to combine it with the observations:\n", "observations = pm.Normal(\"obs\", center_i, tau_i, value=data, observed=True)\n", "\n", - "#below we create a model class\n", - "model = pm.Model([p, assignment, taus, centers])" + "# below we create a model class\n", + "model = pm.Model([p, assignment, taus, centers]);" ], "language": "python", "metadata": {}, @@ -481,7 +483,7 @@ "collapsed": false, "input": [ "mcmc = pm.MCMC(model)\n", - "mcmc.sample(50000)" + "mcmc.sample(50000);" ], "language": "python", "metadata": {}, @@ -520,9 +522,9 @@ "lw = 1\n", "center_trace = mcmc.trace(\"centers\")[:]\n", "\n", - "#for pretty colors later in the book.\n", + "# for pretty colors later in the book.\n", "colors = [\"#348ABD\", \"#A60628\"] if center_trace[-1, 0] > center_trace[-1, 1] \\\n", - " else [\"#A60628\", \"#348ABD\"]\n", + " else [\"#A60628\", \"#348ABD\"]\n", "\n", "plt.plot(center_trace[:, 0], label=\"trace of center 0\", c=colors[0], lw=lw)\n", "plt.plot(center_trace[:, 1], label=\"trace of center 1\", c=colors[1], lw=lw)\n", @@ -533,15 +535,15 @@ "plt.subplot(312)\n", "std_trace = mcmc.trace(\"stds\")[:]\n", "plt.plot(std_trace[:, 0], label=\"trace of standard deviation of cluster 0\",\n", - " c=colors[0], lw=lw)\n", + " c=colors[0], lw=lw)\n", "plt.plot(std_trace[:, 1], label=\"trace of standard deviation of cluster 1\",\n", - " c=colors[1], lw=lw)\n", + " c=colors[1], lw=lw)\n", "plt.legend(loc=\"upper left\")\n", "\n", "plt.subplot(313)\n", "p_trace = mcmc.trace(\"p\")[:]\n", "plt.plot(p_trace, label=\"$p$: frequency of assignment to cluster 0\",\n", - " color=\"#467821\", lw=lw)\n", + " color=\"#467821\", lw=lw)\n", "plt.xlabel(\"Steps\")\n", "plt.ylim(0, 1)\n", "plt.legend();" @@ -580,7 +582,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "mcmc.sample(100000)" + "mcmc.sample(100000);" ], "language": "python", "metadata": {}, @@ -613,13 +615,13 @@ "\n", "x = np.arange(50000)\n", "plt.plot(x, prev_center_trace[:, 0], label=\"previous trace of center 0\",\n", - " lw=lw, alpha=0.4, c=colors[1])\n", + " lw=lw, alpha=0.4, c=colors[1])\n", "plt.plot(x, prev_center_trace[:, 1], label=\"previous trace of center 1\",\n", - " lw=lw, alpha=0.4, c=colors[0])\n", + " lw=lw, alpha=0.4, c=colors[0])\n", "\n", "x = np.arange(50000, 150000)\n", - "plt.plot(x, center_trace[:, 0], label=\"new trace of center 0\", lw=lw, c=\"#348ABD\")\n", - "plt.plot(x, center_trace[:, 1], label=\"new trace of center 1\", lw=lw, c=\"#A60628\")\n", + "plt.plot(x, center_trace[:, 0], label=\"new trace of center 0\", lw=lw, c=\"#348ABD\")\n", + "plt.plot(x, center_trace[:, 1], label=\"new trace of center 1\", lw=lw, c=\"#A60628\")\n", "\n", "plt.title(\"Traces of unknown center parameters\")\n", "leg = plt.legend(loc=\"upper right\")\n", @@ -669,9 +671,9 @@ " plt.title(\"Posterior of standard deviation of cluster %d\" % i)\n", " plt.hist(std_trace[:, i], color=colors[i], bins=30,\n", " histtype=\"stepfilled\")\n", - " #plt.autoscale(tight=True)\n", + " # plt.autoscale(tight=True)\n", "\n", - "plt.tight_layout()" + "plt.tight_layout();" ], "language": "python", "metadata": {}, @@ -704,9 +706,9 @@ "figsize(12.5, 4.5)\n", "plt.cmap = mpl.colors.ListedColormap(colors)\n", "plt.imshow(mcmc.trace(\"assignment\")[::400, np.argsort(data)],\n", - " cmap=plt.cmap, aspect=.4, alpha=.9)\n", + " cmap=plt.cmap, aspect=.4, alpha=.9)\n", "plt.xticks(np.arange(0, data.shape[0], 40),\n", - " [\"%.2f\" % s for s in np.sort(data)[::40]])\n", + " [\"%.2f\" % s for s in np.sort(data)[::40]])\n", "plt.ylabel(\"posterior sample\")\n", "plt.xlabel(\"value of $i$th data point\")\n", "plt.title(\"Posterior labels of data points\");" @@ -738,13 +740,13 @@ "input": [ "cmap = mpl.colors.LinearSegmentedColormap.from_list(\"BMH\", colors)\n", "assign_trace = mcmc.trace(\"assignment\")[:]\n", - "plt.scatter(data, 1-assign_trace.mean(axis=0), cmap=cmap,\n", - " c=assign_trace.mean(axis=0), s=50)\n", + "plt.scatter(data, 1 - assign_trace.mean(axis=0), cmap=cmap,\n", + " c=assign_trace.mean(axis=0), s=50)\n", "plt.ylim(-0.05, 1.05)\n", "plt.xlim(35, 300)\n", "plt.title(\"Probability of data point belonging to cluster 0\")\n", "plt.ylabel(\"probability\")\n", - "plt.xlabel(\"value of data point\")" + "plt.xlabel(\"value of data point\");" ], "language": "python", "metadata": {}, @@ -788,7 +790,7 @@ "posterior_p_mean = mcmc.trace(\"p\")[:].mean()\n", "\n", "plt.hist(data, bins=20, histtype=\"step\", normed=True, color=\"k\",\n", - " lw=2, label=\"histogram of data\")\n", + " lw=2, label=\"histogram of data\")\n", "y = posterior_p_mean * norm.pdf(x, loc=posterior_center_means[0],\n", " scale=posterior_std_means[0])\n", "plt.plot(x, y, label=\"Cluster 0 (using posterior-mean parameters)\", lw=3)\n", @@ -800,7 +802,7 @@ "plt.fill_between(x, y, color=colors[0], alpha=0.3)\n", "\n", "plt.legend(loc=\"upper left\")\n", - "plt.title(\"Visualizing Clusters using posterior-mean parameters\")" + "plt.title(\"Visualizing Clusters using posterior-mean parameters\");" ], "language": "python", "metadata": {}, @@ -851,7 +853,7 @@ "\n", "plt.plot(ex_mcmc.trace(\"x\")[:])\n", "plt.plot(ex_mcmc.trace(\"y\")[:])\n", - "plt.title(\"Displaying (extreme) case of dependence between unknowns\")" + "plt.title(\"Displaying (extreme) case of dependence between unknowns\");" ], "language": "python", "metadata": {}, @@ -937,10 +939,10 @@ "p_trace = mcmc.trace(\"p\")[:]\n", "x = 175\n", "\n", - "v = p_trace*norm_pdf(x, loc=center_trace[:, 0], scale=std_trace[:, 0]) > \\\n", - " (1-p_trace)*norm_pdf(x, loc=center_trace[:, 1], scale=std_trace[:, 1])\n", + "v = p_trace * norm_pdf(x, loc=center_trace[:, 0], scale=std_trace[:, 0]) > \\\n", + " (1 - p_trace) * norm_pdf(x, loc=center_trace[:, 1], scale=std_trace[:, 1])\n", "\n", - "print \"Probability of belonging to cluster 1:\", v.mean()" + "print \"Probability of belonging to cluster 1:\", v.mean();" ], "language": "python", "metadata": {}, @@ -1064,7 +1066,7 @@ "collapsed": false, "input": [ "def autocorr(x):\n", - " #from http://tinyurl.com/afz57c4\n", + " # from http://tinyurl.com/afz57c4\n", " result = np.correlate(x, x, mode='full')\n", " result = result / np.max(result)\n", " return result[result.size / 2:]\n", @@ -1187,7 +1189,7 @@ "from pymc.Matplot import plot as mcplot\n", "\n", "mcmc.sample(25000, 0, 10)\n", - "mcplot(mcmc.trace(\"centers\", 2), common_scale=False)" + "mcplot(mcmc.trace(\"centers\", 2), common_scale=False);" ], "language": "python", "metadata": {}, @@ -1310,7 +1312,7 @@ "def css_styling():\n", " styles = open(\"../styles/custom.css\", \"r\").read()\n", " return HTML(styles)\n", - "css_styling()" + "css_styling();" ], "language": "python", "metadata": {},