CamDavidsonPilon · CamDavidsonPilon · Feb 14, 2014 · Feb 8, 2014 · Feb 9, 2014 · Feb 10, 2014
diff --git a/Chapter3_MCMC/IntroMCMC.ipynb b/Chapter3_MCMC/IntroMCMC.ipynb
@@ -146,29 +146,29 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "### create the observed data\n",
+      "# create the observed data\n",
       "\n",
-      "#sample size of data we observe, trying varying this (keep it less than 100 ;)\n",
+      "# sample size of data we observe, trying varying this (keep it less than 100 ;)\n",
       "N = 1\n",
       "\n",
-      "#the true parameters, but of course we do not see these values...\n",
+      "# the true parameters, but of course we do not see these values...\n",
       "lambda_1_true = 1\n",
       "lambda_2_true = 3\n",
       "\n",
       "#...we see the data generated, dependent on the above two values.\n",
       "data = np.concatenate([\n",
       "    stats.poisson.rvs(lambda_1_true, size=(N, 1)),\n",
       "    stats.poisson.rvs(lambda_2_true, size=(N, 1))\n",
-      "    ],  axis=1)\n",
+      "], axis=1)\n",
       "print \"observed (2-dimensional,sample size = %d):\" % N, data\n",
       "\n",
-      "#plotting details.\n",
+      "# plotting details.\n",
       "x = y = np.linspace(.01, 5, 100)\n",
       "likelihood_x = np.array([stats.poisson.pmf(data[:, 0], _x)\n",
       "                        for _x in x]).prod(axis=1)\n",
       "likelihood_y = np.array([stats.poisson.pmf(data[:, 1], _y)\n",
       "                        for _y in y]).prod(axis=1)\n",
-      "L = np.dot(likelihood_x[:, None], likelihood_y[None, :])"
+      "L = np.dot(likelihood_x[:, None], likelihood_y[None, :]);"
      ],
      "language": "python",
      "metadata": {},
@@ -188,7 +188,7 @@
      "collapsed": false,
      "input": [
       "figsize(12.5, 12)\n",
-      "#matplotlib heavy lifting below, beware!\n",
+      "# matplotlib heavy lifting below, beware!\n",
       "plt.subplot(221)\n",
       "uni_x = stats.uniform.pdf(x, loc=0, scale=5)\n",
       "uni_y = stats.uniform.pdf(x, loc=0, scale=5)\n",
@@ -319,12 +319,12 @@
      "collapsed": false,
      "input": [
       "figsize(12.5, 4)\n",
-      "data = np.loadtxt(\"data/mixture_data.csv\",  delimiter=\",\")\n",
+      "data = np.loadtxt(\"data/mixture_data.csv\", delimiter=\",\")\n",
       "\n",
-      "plt.hist(data, bins=20,  color=\"k\", histtype=\"stepfilled\", alpha=0.8)\n",
+      "plt.hist(data, bins=20, color=\"k\", histtype=\"stepfilled\", alpha=0.8)\n",
       "plt.title(\"Histogram of the dataset\")\n",
       "plt.ylim([0, None])\n",
-      "print data[:10], \"...\""
+      "print data[:10], \"...\";"
      ],
      "language": "python",
      "metadata": {},
@@ -376,9 +376,9 @@
       "\n",
       "p = pm.Uniform(\"p\", 0, 1)\n",
       "\n",
-      "assignment = pm.Categorical(\"assignment\", [p, 1-p], size=data.shape[0])\n",
+      "assignment = pm.Categorical(\"assignment\", [p, 1 - p], size=data.shape[0])\n",
       "print \"prior assignment, with p = %.2f:\" % p.value\n",
-      "print assignment.value[:10], \"...\""
+      "print assignment.value[:10], \"...\";"
      ],
      "language": "python",
      "metadata": {},
@@ -415,25 +415,27 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "taus = 1.0/pm.Uniform(\"stds\", 0, 100, size=2) ** 2\n",
+      "taus = 1.0 / pm.Uniform(\"stds\", 0, 100, size=2) ** 2\n",
       "centers = pm.Normal(\"centers\", [120, 190], [0.01, 0.01], size=2)\n",
       "\n",
       "\"\"\"\n",
       "The below deterministic functions map an assignment, in this case 0 or 1,\n",
       "to a set of parameters, located in the (1,2) arrays `taus` and `centers`.\n",
       "\"\"\"\n",
       "\n",
+      "\n",
       "@pm.deterministic\n",
       "def center_i(assignment=assignment, centers=centers):\n",
       "    return centers[assignment]\n",
       "\n",
+      "\n",
       "@pm.deterministic\n",
       "def tau_i(assignment=assignment, taus=taus):\n",
       "    return taus[assignment]\n",
       "\n",
       "print \"Random assignments: \", assignment.value[:4], \"...\"\n",
       "print \"Assigned center: \", center_i.value[:4], \"...\"\n",
-      "print \"Assigned precision: \", tau_i.value[:4], \"...\""
+      "print \"Assigned precision: \", tau_i.value[:4], \"...\";"
      ],
      "language": "python",
      "metadata": {},
@@ -454,11 +456,11 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "#and to combine it with the observations:\n",
+      "# and to combine it with the observations:\n",
       "observations = pm.Normal(\"obs\", center_i, tau_i, value=data, observed=True)\n",
       "\n",
-      "#below we create a model class\n",
-      "model = pm.Model([p, assignment, taus, centers])"
+      "# below we create a model class\n",
+      "model = pm.Model([p, assignment, taus, centers]);"
      ],
      "language": "python",
      "metadata": {},
@@ -481,7 +483,7 @@
      "collapsed": false,
      "input": [
       "mcmc = pm.MCMC(model)\n",
-      "mcmc.sample(50000)"
+      "mcmc.sample(50000);"
      ],
      "language": "python",
      "metadata": {},
@@ -520,9 +522,9 @@
       "lw = 1\n",
       "center_trace = mcmc.trace(\"centers\")[:]\n",
       "\n",
-      "#for pretty colors later in the book.\n",
+      "# for pretty colors later in the book.\n",
       "colors = [\"#348ABD\", \"#A60628\"] if center_trace[-1, 0] > center_trace[-1, 1] \\\n",
-      "                                else [\"#A60628\", \"#348ABD\"]\n",
+      "    else [\"#A60628\", \"#348ABD\"]\n",
       "\n",
       "plt.plot(center_trace[:, 0], label=\"trace of center 0\", c=colors[0], lw=lw)\n",
       "plt.plot(center_trace[:, 1], label=\"trace of center 1\", c=colors[1], lw=lw)\n",
@@ -533,15 +535,15 @@
       "plt.subplot(312)\n",
       "std_trace = mcmc.trace(\"stds\")[:]\n",
       "plt.plot(std_trace[:, 0], label=\"trace of standard deviation of cluster 0\",\n",
-      "     c=colors[0], lw=lw)\n",
+      "         c=colors[0], lw=lw)\n",
       "plt.plot(std_trace[:, 1], label=\"trace of standard deviation of cluster 1\",\n",
-      "     c=colors[1], lw=lw)\n",
+      "         c=colors[1], lw=lw)\n",
       "plt.legend(loc=\"upper left\")\n",
       "\n",
       "plt.subplot(313)\n",
       "p_trace = mcmc.trace(\"p\")[:]\n",
       "plt.plot(p_trace, label=\"$p$: frequency of assignment to cluster 0\",\n",
-      "     color=\"#467821\", lw=lw)\n",
+      "         color=\"#467821\", lw=lw)\n",
       "plt.xlabel(\"Steps\")\n",
       "plt.ylim(0, 1)\n",
       "plt.legend();"
@@ -580,7 +582,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "mcmc.sample(100000)"
+      "mcmc.sample(100000);"
      ],
      "language": "python",
      "metadata": {},
@@ -613,13 +615,13 @@
       "\n",
       "x = np.arange(50000)\n",
       "plt.plot(x, prev_center_trace[:, 0], label=\"previous trace of center 0\",\n",
-      "     lw=lw, alpha=0.4, c=colors[1])\n",
+      "         lw=lw, alpha=0.4, c=colors[1])\n",
       "plt.plot(x, prev_center_trace[:, 1], label=\"previous trace of center 1\",\n",
-      "     lw=lw, alpha=0.4, c=colors[0])\n",
+      "         lw=lw, alpha=0.4, c=colors[0])\n",
       "\n",
       "x = np.arange(50000, 150000)\n",
-      "plt.plot(x, center_trace[:, 0], label=\"new trace of center 0\",  lw=lw, c=\"#348ABD\")\n",
-      "plt.plot(x, center_trace[:, 1], label=\"new trace of center 1\",  lw=lw, c=\"#A60628\")\n",
+      "plt.plot(x, center_trace[:, 0], label=\"new trace of center 0\", lw=lw, c=\"#348ABD\")\n",
+      "plt.plot(x, center_trace[:, 1], label=\"new trace of center 1\", lw=lw, c=\"#A60628\")\n",
       "\n",
       "plt.title(\"Traces of unknown center parameters\")\n",
       "leg = plt.legend(loc=\"upper right\")\n",
@@ -669,9 +671,9 @@
       "    plt.title(\"Posterior of standard deviation of cluster %d\" % i)\n",
       "    plt.hist(std_trace[:, i], color=colors[i], bins=30,\n",
       "             histtype=\"stepfilled\")\n",
-      "    #plt.autoscale(tight=True)\n",
+      "    # plt.autoscale(tight=True)\n",
       "\n",
-      "plt.tight_layout()"
+      "plt.tight_layout();"
      ],
      "language": "python",
      "metadata": {},
@@ -704,9 +706,9 @@
       "figsize(12.5, 4.5)\n",
       "plt.cmap = mpl.colors.ListedColormap(colors)\n",
       "plt.imshow(mcmc.trace(\"assignment\")[::400, np.argsort(data)],\n",
-      "       cmap=plt.cmap, aspect=.4, alpha=.9)\n",
+      "           cmap=plt.cmap, aspect=.4, alpha=.9)\n",
       "plt.xticks(np.arange(0, data.shape[0], 40),\n",
-      "       [\"%.2f\" % s for s in np.sort(data)[::40]])\n",
+      "           [\"%.2f\" % s for s in np.sort(data)[::40]])\n",
       "plt.ylabel(\"posterior sample\")\n",
       "plt.xlabel(\"value of $i$th data point\")\n",
       "plt.title(\"Posterior labels of data points\");"
@@ -738,13 +740,13 @@
      "input": [
       "cmap = mpl.colors.LinearSegmentedColormap.from_list(\"BMH\", colors)\n",
       "assign_trace = mcmc.trace(\"assignment\")[:]\n",
-      "plt.scatter(data, 1-assign_trace.mean(axis=0), cmap=cmap,\n",
-      "        c=assign_trace.mean(axis=0), s=50)\n",
+      "plt.scatter(data, 1 - assign_trace.mean(axis=0), cmap=cmap,\n",
+      "            c=assign_trace.mean(axis=0), s=50)\n",
       "plt.ylim(-0.05, 1.05)\n",
       "plt.xlim(35, 300)\n",
       "plt.title(\"Probability of data point belonging to cluster 0\")\n",
       "plt.ylabel(\"probability\")\n",
-      "plt.xlabel(\"value of data point\")"
+      "plt.xlabel(\"value of data point\");"
      ],
      "language": "python",
      "metadata": {},
@@ -788,7 +790,7 @@
       "posterior_p_mean = mcmc.trace(\"p\")[:].mean()\n",
       "\n",
       "plt.hist(data, bins=20, histtype=\"step\", normed=True, color=\"k\",\n",
-      "     lw=2, label=\"histogram of data\")\n",
+      "         lw=2, label=\"histogram of data\")\n",
       "y = posterior_p_mean * norm.pdf(x, loc=posterior_center_means[0],\n",
       "                                scale=posterior_std_means[0])\n",
       "plt.plot(x, y, label=\"Cluster 0 (using posterior-mean parameters)\", lw=3)\n",
@@ -800,7 +802,7 @@
       "plt.fill_between(x, y, color=colors[0], alpha=0.3)\n",
       "\n",
       "plt.legend(loc=\"upper left\")\n",
-      "plt.title(\"Visualizing Clusters using posterior-mean parameters\")"
+      "plt.title(\"Visualizing Clusters using posterior-mean parameters\");"
      ],
      "language": "python",
      "metadata": {},
@@ -851,7 +853,7 @@
       "\n",
       "plt.plot(ex_mcmc.trace(\"x\")[:])\n",
       "plt.plot(ex_mcmc.trace(\"y\")[:])\n",
-      "plt.title(\"Displaying (extreme) case of dependence between unknowns\")"
+      "plt.title(\"Displaying (extreme) case of dependence between unknowns\");"
      ],
      "language": "python",
      "metadata": {},
@@ -937,10 +939,10 @@
       "p_trace = mcmc.trace(\"p\")[:]\n",
       "x = 175\n",
       "\n",
-      "v = p_trace*norm_pdf(x, loc=center_trace[:, 0], scale=std_trace[:, 0]) > \\\n",
-      "    (1-p_trace)*norm_pdf(x, loc=center_trace[:, 1], scale=std_trace[:, 1])\n",
+      "v = p_trace * norm_pdf(x, loc=center_trace[:, 0], scale=std_trace[:, 0]) > \\\n",
+      "    (1 - p_trace) * norm_pdf(x, loc=center_trace[:, 1], scale=std_trace[:, 1])\n",
       "\n",
-      "print \"Probability of belonging to cluster 1:\", v.mean()"
+      "print \"Probability of belonging to cluster 1:\", v.mean();"
      ],
      "language": "python",
      "metadata": {},
@@ -1064,7 +1066,7 @@
      "collapsed": false,
      "input": [
       "def autocorr(x):\n",
-      "    #from http://tinyurl.com/afz57c4\n",
+      "    # from http://tinyurl.com/afz57c4\n",
       "    result = np.correlate(x, x, mode='full')\n",
       "    result = result / np.max(result)\n",
       "    return result[result.size / 2:]\n",
@@ -1187,7 +1189,7 @@
       "from pymc.Matplot import plot as mcplot\n",
       "\n",
       "mcmc.sample(25000, 0, 10)\n",
-      "mcplot(mcmc.trace(\"centers\", 2), common_scale=False)"
+      "mcplot(mcmc.trace(\"centers\", 2), common_scale=False);"
      ],
      "language": "python",
      "metadata": {},
@@ -1310,7 +1312,7 @@
       "def css_styling():\n",
       "    styles = open(\"../styles/custom.css\", \"r\").read()\n",
       "    return HTML(styles)\n",
-      "css_styling()"
+      "css_styling();"
      ],
      "language": "python",
      "metadata": {},