|
146 | 146 | "cell_type": "code",
|
147 | 147 | "collapsed": false,
|
148 | 148 | "input": [
|
149 |
| - "### create the observed data\n", |
| 149 | + "# create the observed data\n", |
150 | 150 | "\n",
|
151 |
| - "#sample size of data we observe, trying varying this (keep it less than 100 ;)\n", |
| 151 | + "# sample size of data we observe, trying varying this (keep it less than 100 ;)\n", |
152 | 152 | "N = 1\n",
|
153 | 153 | "\n",
|
154 |
| - "#the true parameters, but of course we do not see these values...\n", |
| 154 | + "# the true parameters, but of course we do not see these values...\n", |
155 | 155 | "lambda_1_true = 1\n",
|
156 | 156 | "lambda_2_true = 3\n",
|
157 | 157 | "\n",
|
158 | 158 | "#...we see the data generated, dependent on the above two values.\n",
|
159 | 159 | "data = np.concatenate([\n",
|
160 | 160 | " stats.poisson.rvs(lambda_1_true, size=(N, 1)),\n",
|
161 | 161 | " stats.poisson.rvs(lambda_2_true, size=(N, 1))\n",
|
162 |
| - " ], axis=1)\n", |
| 162 | + "], axis=1)\n", |
163 | 163 | "print \"observed (2-dimensional,sample size = %d):\" % N, data\n",
|
164 | 164 | "\n",
|
165 |
| - "#plotting details.\n", |
| 165 | + "# plotting details.\n", |
166 | 166 | "x = y = np.linspace(.01, 5, 100)\n",
|
167 | 167 | "likelihood_x = np.array([stats.poisson.pmf(data[:, 0], _x)\n",
|
168 | 168 | " for _x in x]).prod(axis=1)\n",
|
169 | 169 | "likelihood_y = np.array([stats.poisson.pmf(data[:, 1], _y)\n",
|
170 | 170 | " for _y in y]).prod(axis=1)\n",
|
171 |
| - "L = np.dot(likelihood_x[:, None], likelihood_y[None, :])" |
| 171 | + "L = np.dot(likelihood_x[:, None], likelihood_y[None, :]);" |
172 | 172 | ],
|
173 | 173 | "language": "python",
|
174 | 174 | "metadata": {},
|
|
188 | 188 | "collapsed": false,
|
189 | 189 | "input": [
|
190 | 190 | "figsize(12.5, 12)\n",
|
191 |
| - "#matplotlib heavy lifting below, beware!\n", |
| 191 | + "# matplotlib heavy lifting below, beware!\n", |
192 | 192 | "plt.subplot(221)\n",
|
193 | 193 | "uni_x = stats.uniform.pdf(x, loc=0, scale=5)\n",
|
194 | 194 | "uni_y = stats.uniform.pdf(x, loc=0, scale=5)\n",
|
|
319 | 319 | "collapsed": false,
|
320 | 320 | "input": [
|
321 | 321 | "figsize(12.5, 4)\n",
|
322 |
| - "data = np.loadtxt(\"data/mixture_data.csv\", delimiter=\",\")\n", |
| 322 | + "data = np.loadtxt(\"data/mixture_data.csv\", delimiter=\",\")\n", |
323 | 323 | "\n",
|
324 |
| - "plt.hist(data, bins=20, color=\"k\", histtype=\"stepfilled\", alpha=0.8)\n", |
| 324 | + "plt.hist(data, bins=20, color=\"k\", histtype=\"stepfilled\", alpha=0.8)\n", |
325 | 325 | "plt.title(\"Histogram of the dataset\")\n",
|
326 | 326 | "plt.ylim([0, None])\n",
|
327 |
| - "print data[:10], \"...\"" |
| 327 | + "print data[:10], \"...\";" |
328 | 328 | ],
|
329 | 329 | "language": "python",
|
330 | 330 | "metadata": {},
|
|
376 | 376 | "\n",
|
377 | 377 | "p = pm.Uniform(\"p\", 0, 1)\n",
|
378 | 378 | "\n",
|
379 |
| - "assignment = pm.Categorical(\"assignment\", [p, 1-p], size=data.shape[0])\n", |
| 379 | + "assignment = pm.Categorical(\"assignment\", [p, 1 - p], size=data.shape[0])\n", |
380 | 380 | "print \"prior assignment, with p = %.2f:\" % p.value\n",
|
381 |
| - "print assignment.value[:10], \"...\"" |
| 381 | + "print assignment.value[:10], \"...\";" |
382 | 382 | ],
|
383 | 383 | "language": "python",
|
384 | 384 | "metadata": {},
|
|
415 | 415 | "cell_type": "code",
|
416 | 416 | "collapsed": false,
|
417 | 417 | "input": [
|
418 |
| - "taus = 1.0/pm.Uniform(\"stds\", 0, 100, size=2) ** 2\n", |
| 418 | + "taus = 1.0 / pm.Uniform(\"stds\", 0, 100, size=2) ** 2\n", |
419 | 419 | "centers = pm.Normal(\"centers\", [120, 190], [0.01, 0.01], size=2)\n",
|
420 | 420 | "\n",
|
421 | 421 | "\"\"\"\n",
|
422 | 422 | "The below deterministic functions map an assignment, in this case 0 or 1,\n",
|
423 | 423 | "to a set of parameters, located in the (1,2) arrays `taus` and `centers`.\n",
|
424 | 424 | "\"\"\"\n",
|
425 | 425 | "\n",
|
| 426 | + "\n", |
426 | 427 | "@pm.deterministic\n",
|
427 | 428 | "def center_i(assignment=assignment, centers=centers):\n",
|
428 | 429 | " return centers[assignment]\n",
|
429 | 430 | "\n",
|
| 431 | + "\n", |
430 | 432 | "@pm.deterministic\n",
|
431 | 433 | "def tau_i(assignment=assignment, taus=taus):\n",
|
432 | 434 | " return taus[assignment]\n",
|
433 | 435 | "\n",
|
434 | 436 | "print \"Random assignments: \", assignment.value[:4], \"...\"\n",
|
435 | 437 | "print \"Assigned center: \", center_i.value[:4], \"...\"\n",
|
436 |
| - "print \"Assigned precision: \", tau_i.value[:4], \"...\"" |
| 438 | + "print \"Assigned precision: \", tau_i.value[:4], \"...\";" |
437 | 439 | ],
|
438 | 440 | "language": "python",
|
439 | 441 | "metadata": {},
|
|
454 | 456 | "cell_type": "code",
|
455 | 457 | "collapsed": false,
|
456 | 458 | "input": [
|
457 |
| - "#and to combine it with the observations:\n", |
| 459 | + "# and to combine it with the observations:\n", |
458 | 460 | "observations = pm.Normal(\"obs\", center_i, tau_i, value=data, observed=True)\n",
|
459 | 461 | "\n",
|
460 |
| - "#below we create a model class\n", |
461 |
| - "model = pm.Model([p, assignment, taus, centers])" |
| 462 | + "# below we create a model class\n", |
| 463 | + "model = pm.Model([p, assignment, taus, centers]);" |
462 | 464 | ],
|
463 | 465 | "language": "python",
|
464 | 466 | "metadata": {},
|
|
481 | 483 | "collapsed": false,
|
482 | 484 | "input": [
|
483 | 485 | "mcmc = pm.MCMC(model)\n",
|
484 |
| - "mcmc.sample(50000)" |
| 486 | + "mcmc.sample(50000);" |
485 | 487 | ],
|
486 | 488 | "language": "python",
|
487 | 489 | "metadata": {},
|
|
520 | 522 | "lw = 1\n",
|
521 | 523 | "center_trace = mcmc.trace(\"centers\")[:]\n",
|
522 | 524 | "\n",
|
523 |
| - "#for pretty colors later in the book.\n", |
| 525 | + "# for pretty colors later in the book.\n", |
524 | 526 | "colors = [\"#348ABD\", \"#A60628\"] if center_trace[-1, 0] > center_trace[-1, 1] \\\n",
|
525 |
| - " else [\"#A60628\", \"#348ABD\"]\n", |
| 527 | + " else [\"#A60628\", \"#348ABD\"]\n", |
526 | 528 | "\n",
|
527 | 529 | "plt.plot(center_trace[:, 0], label=\"trace of center 0\", c=colors[0], lw=lw)\n",
|
528 | 530 | "plt.plot(center_trace[:, 1], label=\"trace of center 1\", c=colors[1], lw=lw)\n",
|
|
533 | 535 | "plt.subplot(312)\n",
|
534 | 536 | "std_trace = mcmc.trace(\"stds\")[:]\n",
|
535 | 537 | "plt.plot(std_trace[:, 0], label=\"trace of standard deviation of cluster 0\",\n",
|
536 |
| - " c=colors[0], lw=lw)\n", |
| 538 | + " c=colors[0], lw=lw)\n", |
537 | 539 | "plt.plot(std_trace[:, 1], label=\"trace of standard deviation of cluster 1\",\n",
|
538 |
| - " c=colors[1], lw=lw)\n", |
| 540 | + " c=colors[1], lw=lw)\n", |
539 | 541 | "plt.legend(loc=\"upper left\")\n",
|
540 | 542 | "\n",
|
541 | 543 | "plt.subplot(313)\n",
|
542 | 544 | "p_trace = mcmc.trace(\"p\")[:]\n",
|
543 | 545 | "plt.plot(p_trace, label=\"$p$: frequency of assignment to cluster 0\",\n",
|
544 |
| - " color=\"#467821\", lw=lw)\n", |
| 546 | + " color=\"#467821\", lw=lw)\n", |
545 | 547 | "plt.xlabel(\"Steps\")\n",
|
546 | 548 | "plt.ylim(0, 1)\n",
|
547 | 549 | "plt.legend();"
|
|
580 | 582 | "cell_type": "code",
|
581 | 583 | "collapsed": false,
|
582 | 584 | "input": [
|
583 |
| - "mcmc.sample(100000)" |
| 585 | + "mcmc.sample(100000);" |
584 | 586 | ],
|
585 | 587 | "language": "python",
|
586 | 588 | "metadata": {},
|
|
613 | 615 | "\n",
|
614 | 616 | "x = np.arange(50000)\n",
|
615 | 617 | "plt.plot(x, prev_center_trace[:, 0], label=\"previous trace of center 0\",\n",
|
616 |
| - " lw=lw, alpha=0.4, c=colors[1])\n", |
| 618 | + " lw=lw, alpha=0.4, c=colors[1])\n", |
617 | 619 | "plt.plot(x, prev_center_trace[:, 1], label=\"previous trace of center 1\",\n",
|
618 |
| - " lw=lw, alpha=0.4, c=colors[0])\n", |
| 620 | + " lw=lw, alpha=0.4, c=colors[0])\n", |
619 | 621 | "\n",
|
620 | 622 | "x = np.arange(50000, 150000)\n",
|
621 |
| - "plt.plot(x, center_trace[:, 0], label=\"new trace of center 0\", lw=lw, c=\"#348ABD\")\n", |
622 |
| - "plt.plot(x, center_trace[:, 1], label=\"new trace of center 1\", lw=lw, c=\"#A60628\")\n", |
| 623 | + "plt.plot(x, center_trace[:, 0], label=\"new trace of center 0\", lw=lw, c=\"#348ABD\")\n", |
| 624 | + "plt.plot(x, center_trace[:, 1], label=\"new trace of center 1\", lw=lw, c=\"#A60628\")\n", |
623 | 625 | "\n",
|
624 | 626 | "plt.title(\"Traces of unknown center parameters\")\n",
|
625 | 627 | "leg = plt.legend(loc=\"upper right\")\n",
|
|
669 | 671 | " plt.title(\"Posterior of standard deviation of cluster %d\" % i)\n",
|
670 | 672 | " plt.hist(std_trace[:, i], color=colors[i], bins=30,\n",
|
671 | 673 | " histtype=\"stepfilled\")\n",
|
672 |
| - " #plt.autoscale(tight=True)\n", |
| 674 | + " # plt.autoscale(tight=True)\n", |
673 | 675 | "\n",
|
674 |
| - "plt.tight_layout()" |
| 676 | + "plt.tight_layout();" |
675 | 677 | ],
|
676 | 678 | "language": "python",
|
677 | 679 | "metadata": {},
|
|
704 | 706 | "figsize(12.5, 4.5)\n",
|
705 | 707 | "plt.cmap = mpl.colors.ListedColormap(colors)\n",
|
706 | 708 | "plt.imshow(mcmc.trace(\"assignment\")[::400, np.argsort(data)],\n",
|
707 |
| - " cmap=plt.cmap, aspect=.4, alpha=.9)\n", |
| 709 | + " cmap=plt.cmap, aspect=.4, alpha=.9)\n", |
708 | 710 | "plt.xticks(np.arange(0, data.shape[0], 40),\n",
|
709 |
| - " [\"%.2f\" % s for s in np.sort(data)[::40]])\n", |
| 711 | + " [\"%.2f\" % s for s in np.sort(data)[::40]])\n", |
710 | 712 | "plt.ylabel(\"posterior sample\")\n",
|
711 | 713 | "plt.xlabel(\"value of $i$th data point\")\n",
|
712 | 714 | "plt.title(\"Posterior labels of data points\");"
|
|
738 | 740 | "input": [
|
739 | 741 | "cmap = mpl.colors.LinearSegmentedColormap.from_list(\"BMH\", colors)\n",
|
740 | 742 | "assign_trace = mcmc.trace(\"assignment\")[:]\n",
|
741 |
| - "plt.scatter(data, 1-assign_trace.mean(axis=0), cmap=cmap,\n", |
742 |
| - " c=assign_trace.mean(axis=0), s=50)\n", |
| 743 | + "plt.scatter(data, 1 - assign_trace.mean(axis=0), cmap=cmap,\n", |
| 744 | + " c=assign_trace.mean(axis=0), s=50)\n", |
743 | 745 | "plt.ylim(-0.05, 1.05)\n",
|
744 | 746 | "plt.xlim(35, 300)\n",
|
745 | 747 | "plt.title(\"Probability of data point belonging to cluster 0\")\n",
|
746 | 748 | "plt.ylabel(\"probability\")\n",
|
747 |
| - "plt.xlabel(\"value of data point\")" |
| 749 | + "plt.xlabel(\"value of data point\");" |
748 | 750 | ],
|
749 | 751 | "language": "python",
|
750 | 752 | "metadata": {},
|
|
788 | 790 | "posterior_p_mean = mcmc.trace(\"p\")[:].mean()\n",
|
789 | 791 | "\n",
|
790 | 792 | "plt.hist(data, bins=20, histtype=\"step\", normed=True, color=\"k\",\n",
|
791 |
| - " lw=2, label=\"histogram of data\")\n", |
| 793 | + " lw=2, label=\"histogram of data\")\n", |
792 | 794 | "y = posterior_p_mean * norm.pdf(x, loc=posterior_center_means[0],\n",
|
793 | 795 | " scale=posterior_std_means[0])\n",
|
794 | 796 | "plt.plot(x, y, label=\"Cluster 0 (using posterior-mean parameters)\", lw=3)\n",
|
|
800 | 802 | "plt.fill_between(x, y, color=colors[0], alpha=0.3)\n",
|
801 | 803 | "\n",
|
802 | 804 | "plt.legend(loc=\"upper left\")\n",
|
803 |
| - "plt.title(\"Visualizing Clusters using posterior-mean parameters\")" |
| 805 | + "plt.title(\"Visualizing Clusters using posterior-mean parameters\");" |
804 | 806 | ],
|
805 | 807 | "language": "python",
|
806 | 808 | "metadata": {},
|
|
851 | 853 | "\n",
|
852 | 854 | "plt.plot(ex_mcmc.trace(\"x\")[:])\n",
|
853 | 855 | "plt.plot(ex_mcmc.trace(\"y\")[:])\n",
|
854 |
| - "plt.title(\"Displaying (extreme) case of dependence between unknowns\")" |
| 856 | + "plt.title(\"Displaying (extreme) case of dependence between unknowns\");" |
855 | 857 | ],
|
856 | 858 | "language": "python",
|
857 | 859 | "metadata": {},
|
|
937 | 939 | "p_trace = mcmc.trace(\"p\")[:]\n",
|
938 | 940 | "x = 175\n",
|
939 | 941 | "\n",
|
940 |
| - "v = p_trace*norm_pdf(x, loc=center_trace[:, 0], scale=std_trace[:, 0]) > \\\n", |
941 |
| - " (1-p_trace)*norm_pdf(x, loc=center_trace[:, 1], scale=std_trace[:, 1])\n", |
| 942 | + "v = p_trace * norm_pdf(x, loc=center_trace[:, 0], scale=std_trace[:, 0]) > \\\n", |
| 943 | + " (1 - p_trace) * norm_pdf(x, loc=center_trace[:, 1], scale=std_trace[:, 1])\n", |
942 | 944 | "\n",
|
943 |
| - "print \"Probability of belonging to cluster 1:\", v.mean()" |
| 945 | + "print \"Probability of belonging to cluster 1:\", v.mean();" |
944 | 946 | ],
|
945 | 947 | "language": "python",
|
946 | 948 | "metadata": {},
|
|
1064 | 1066 | "collapsed": false,
|
1065 | 1067 | "input": [
|
1066 | 1068 | "def autocorr(x):\n",
|
1067 |
| - " #from http://tinyurl.com/afz57c4\n", |
| 1069 | + " # from http://tinyurl.com/afz57c4\n", |
1068 | 1070 | " result = np.correlate(x, x, mode='full')\n",
|
1069 | 1071 | " result = result / np.max(result)\n",
|
1070 | 1072 | " return result[result.size / 2:]\n",
|
|
1187 | 1189 | "from pymc.Matplot import plot as mcplot\n",
|
1188 | 1190 | "\n",
|
1189 | 1191 | "mcmc.sample(25000, 0, 10)\n",
|
1190 |
| - "mcplot(mcmc.trace(\"centers\", 2), common_scale=False)" |
| 1192 | + "mcplot(mcmc.trace(\"centers\", 2), common_scale=False);" |
1191 | 1193 | ],
|
1192 | 1194 | "language": "python",
|
1193 | 1195 | "metadata": {},
|
|
1310 | 1312 | "def css_styling():\n",
|
1311 | 1313 | " styles = open(\"../styles/custom.css\", \"r\").read()\n",
|
1312 | 1314 | " return HTML(styles)\n",
|
1313 |
| - "css_styling()" |
| 1315 | + "css_styling();" |
1314 | 1316 | ],
|
1315 | 1317 | "language": "python",
|
1316 | 1318 | "metadata": {},
|
|
0 commit comments