|
43 | 43 | "cell_type": "code",
|
44 | 44 | "execution_count": null,
|
45 | 45 | "metadata": {
|
| 46 | + "collapsed": true, |
46 | 47 | "isConfigCell": true
|
47 | 48 | },
|
48 | 49 | "outputs": [],
|
|
69 | 70 | {
|
70 | 71 | "cell_type": "code",
|
71 | 72 | "execution_count": null,
|
72 |
| - "metadata": {}, |
| 73 | + "metadata": { |
| 74 | + "collapsed": true |
| 75 | + }, |
73 | 76 | "outputs": [],
|
74 | 77 | "source": [
|
75 | 78 | "import boto3\n",
|
|
102 | 105 | {
|
103 | 106 | "cell_type": "code",
|
104 | 107 | "execution_count": null,
|
105 |
| - "metadata": {}, |
| 108 | + "metadata": { |
| 109 | + "collapsed": true |
| 110 | + }, |
106 | 111 | "outputs": [],
|
107 | 112 | "source": [
|
108 | 113 | "def get_gdelt(filename):\n",
|
|
117 | 122 | {
|
118 | 123 | "cell_type": "code",
|
119 | 124 | "execution_count": null,
|
120 |
| - "metadata": {}, |
| 125 | + "metadata": { |
| 126 | + "collapsed": true |
| 127 | + }, |
121 | 128 | "outputs": [],
|
122 | 129 | "source": [
|
123 | 130 | "data = get_gdelt('1979.csv')\n",
|
|
142 | 149 | {
|
143 | 150 | "cell_type": "code",
|
144 | 151 | "execution_count": null,
|
145 |
| - "metadata": {}, |
| 152 | + "metadata": { |
| 153 | + "collapsed": true |
| 154 | + }, |
146 | 155 | "outputs": [],
|
147 | 156 | "source": [
|
148 | 157 | "data = data[['EventCode', 'NumArticles', 'AvgTone', 'Actor1Geo_Lat', 'Actor1Geo_Long', 'Actor2Geo_Lat', 'Actor2Geo_Long']]\n",
|
|
172 | 181 | {
|
173 | 182 | "cell_type": "code",
|
174 | 183 | "execution_count": null,
|
175 |
| - "metadata": {}, |
| 184 | + "metadata": { |
| 185 | + "collapsed": true |
| 186 | + }, |
176 | 187 | "outputs": [],
|
177 | 188 | "source": [
|
178 | 189 | "events = pd.crosstab(index=data['EventCode'], columns='count').sort_values(by='count', ascending=False).index[:20]"
|
|
190 | 201 | {
|
191 | 202 | "cell_type": "code",
|
192 | 203 | "execution_count": null,
|
193 |
| - "metadata": {}, |
| 204 | + "metadata": { |
| 205 | + "collapsed": true |
| 206 | + }, |
194 | 207 | "outputs": [],
|
195 | 208 | "source": [
|
196 | 209 | "def write_to_s3(bucket, prefix, channel, file_prefix, X, y):\n",
|
|
219 | 232 | {
|
220 | 233 | "cell_type": "code",
|
221 | 234 | "execution_count": null,
|
222 |
| - "metadata": {}, |
| 235 | + "metadata": { |
| 236 | + "collapsed": true |
| 237 | + }, |
223 | 238 | "outputs": [],
|
224 | 239 | "source": [
|
225 | 240 | "for year in range(1979, 1984):\n",
|
|
244 | 259 | {
|
245 | 260 | "cell_type": "code",
|
246 | 261 | "execution_count": null,
|
247 |
| - "metadata": {}, |
| 262 | + "metadata": { |
| 263 | + "collapsed": true |
| 264 | + }, |
248 | 265 | "outputs": [],
|
249 | 266 | "source": [
|
250 |
| - "containers = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/linear-learner:latest',\n", |
251 |
| - " 'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:latest',\n", |
252 |
| - " 'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/linear-learner:latest',\n", |
253 |
| - " 'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/linear-learner:latest',\n", |
254 |
| - " 'ap-northeast-1': '351501993468.dkr.ecr.ap-northeast-1.amazonaws.com/linear-learner:latest',\n", |
255 |
| - " 'ap-northeast-2': '835164637446.dkr.ecr.ap-northeast-2.amazonaws.com/linear-learner:latest'}\n", |
256 |
| - "container = containers[boto3.Session().region_name]" |
| 267 | + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", |
| 268 | + "container = get_image_uri(boto3.Session().region_name, 'linear-learner')" |
257 | 269 | ]
|
258 | 270 | },
|
259 | 271 | {
|
260 | 272 | "cell_type": "code",
|
261 | 273 | "execution_count": null,
|
262 |
| - "metadata": {}, |
| 274 | + "metadata": { |
| 275 | + "collapsed": true |
| 276 | + }, |
263 | 277 | "outputs": [],
|
264 | 278 | "source": [
|
265 | 279 | "common_training_params = {\n",
|
|
326 | 340 | {
|
327 | 341 | "cell_type": "code",
|
328 | 342 | "execution_count": null,
|
329 |
| - "metadata": {}, |
| 343 | + "metadata": { |
| 344 | + "collapsed": true |
| 345 | + }, |
330 | 346 | "outputs": [],
|
331 | 347 | "source": [
|
332 | 348 | "sharded_job = 'DEMO-linear-sharded-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n",
|
|
341 | 357 | {
|
342 | 358 | "cell_type": "code",
|
343 | 359 | "execution_count": null,
|
344 |
| - "metadata": {}, |
| 360 | + "metadata": { |
| 361 | + "collapsed": true |
| 362 | + }, |
345 | 363 | "outputs": [],
|
346 | 364 | "source": [
|
347 | 365 | "replicated_job = 'DEMO-linear-replicated-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n",
|
|
363 | 381 | {
|
364 | 382 | "cell_type": "code",
|
365 | 383 | "execution_count": null,
|
366 |
| - "metadata": {}, |
| 384 | + "metadata": { |
| 385 | + "collapsed": true |
| 386 | + }, |
367 | 387 | "outputs": [],
|
368 | 388 | "source": [
|
369 | 389 | "%%time\n",
|
|
395 | 415 | {
|
396 | 416 | "cell_type": "code",
|
397 | 417 | "execution_count": null,
|
398 |
| - "metadata": {}, |
| 418 | + "metadata": { |
| 419 | + "collapsed": true |
| 420 | + }, |
399 | 421 | "outputs": [],
|
400 | 422 | "source": [
|
401 | 423 | "print('Sharded:', sm.describe_training_job(TrainingJobName=sharded_job)['TrainingJobStatus'])\n",
|
|
416 | 438 | {
|
417 | 439 | "cell_type": "code",
|
418 | 440 | "execution_count": null,
|
419 |
| - "metadata": {}, |
| 441 | + "metadata": { |
| 442 | + "collapsed": true |
| 443 | + }, |
420 | 444 | "outputs": [],
|
421 | 445 | "source": [
|
422 | 446 | "def get_train_timing(job):\n",
|
|
435 | 459 | {
|
436 | 460 | "cell_type": "code",
|
437 | 461 | "execution_count": null,
|
438 |
| - "metadata": {}, |
| 462 | + "metadata": { |
| 463 | + "collapsed": true |
| 464 | + }, |
439 | 465 | "outputs": [],
|
440 | 466 | "source": [
|
441 | 467 | "print('Sharded:', get_train_timing(sharded_job), 'minutes')\n",
|
|
467 | 493 | {
|
468 | 494 | "cell_type": "code",
|
469 | 495 | "execution_count": null,
|
470 |
| - "metadata": {}, |
| 496 | + "metadata": { |
| 497 | + "collapsed": true |
| 498 | + }, |
471 | 499 | "outputs": [],
|
472 | 500 | "source": [
|
473 | 501 | "sharded_model_response = sm.create_model(\n",
|
|
483 | 511 | {
|
484 | 512 | "cell_type": "code",
|
485 | 513 | "execution_count": null,
|
486 |
| - "metadata": {}, |
| 514 | + "metadata": { |
| 515 | + "collapsed": true |
| 516 | + }, |
487 | 517 | "outputs": [],
|
488 | 518 | "source": [
|
489 | 519 | "replicated_model_response = sm.create_model(\n",
|
|
511 | 541 | {
|
512 | 542 | "cell_type": "code",
|
513 | 543 | "execution_count": null,
|
514 |
| - "metadata": {}, |
| 544 | + "metadata": { |
| 545 | + "collapsed": true |
| 546 | + }, |
515 | 547 | "outputs": [],
|
516 | 548 | "source": [
|
517 | 549 | "sharded_endpoint_config = 'DEMO-sharded-endpoint-config-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n",
|
|
530 | 562 | {
|
531 | 563 | "cell_type": "code",
|
532 | 564 | "execution_count": null,
|
533 |
| - "metadata": {}, |
| 565 | + "metadata": { |
| 566 | + "collapsed": true |
| 567 | + }, |
534 | 568 | "outputs": [],
|
535 | 569 | "source": [
|
536 | 570 | "replicated_endpoint_config = 'DEMO-replicated-endpoint-config-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n",
|
|
556 | 590 | {
|
557 | 591 | "cell_type": "code",
|
558 | 592 | "execution_count": null,
|
559 |
| - "metadata": {}, |
| 593 | + "metadata": { |
| 594 | + "collapsed": true |
| 595 | + }, |
560 | 596 | "outputs": [],
|
561 | 597 | "source": [
|
562 | 598 | "%%time\n",
|
|
600 | 636 | {
|
601 | 637 | "cell_type": "code",
|
602 | 638 | "execution_count": null,
|
603 |
| - "metadata": {}, |
| 639 | + "metadata": { |
| 640 | + "collapsed": true |
| 641 | + }, |
604 | 642 | "outputs": [],
|
605 | 643 | "source": [
|
606 | 644 | "print('Sharded:', sm.describe_endpoint(EndpointName=sharded_endpoint)['EndpointStatus'])\n",
|
|
619 | 657 | {
|
620 | 658 | "cell_type": "code",
|
621 | 659 | "execution_count": null,
|
622 |
| - "metadata": {}, |
| 660 | + "metadata": { |
| 661 | + "collapsed": true |
| 662 | + }, |
623 | 663 | "outputs": [],
|
624 | 664 | "source": [
|
625 | 665 | "test_data = transform_gdelt(get_gdelt('1984.csv'), events).as_matrix()\n",
|
|
637 | 677 | {
|
638 | 678 | "cell_type": "code",
|
639 | 679 | "execution_count": null,
|
640 |
| - "metadata": {}, |
| 680 | + "metadata": { |
| 681 | + "collapsed": true |
| 682 | + }, |
641 | 683 | "outputs": [],
|
642 | 684 | "source": [
|
643 | 685 | "def np2csv(arr):\n",
|
|
656 | 698 | {
|
657 | 699 | "cell_type": "code",
|
658 | 700 | "execution_count": null,
|
659 |
| - "metadata": {}, |
| 701 | + "metadata": { |
| 702 | + "collapsed": true |
| 703 | + }, |
660 | 704 | "outputs": [],
|
661 | 705 | "source": [
|
662 | 706 | "def predict_batches(data, endpoint):\n",
|
|
684 | 728 | {
|
685 | 729 | "cell_type": "code",
|
686 | 730 | "execution_count": null,
|
687 |
| - "metadata": {}, |
| 731 | + "metadata": { |
| 732 | + "collapsed": true |
| 733 | + }, |
688 | 734 | "outputs": [],
|
689 | 735 | "source": [
|
690 | 736 | "sharded_predictions = predict_batches(test_X, sharded_endpoint)\n",
|
|
728 | 774 | {
|
729 | 775 | "cell_type": "code",
|
730 | 776 | "execution_count": null,
|
731 |
| - "metadata": {}, |
| 777 | + "metadata": { |
| 778 | + "collapsed": true |
| 779 | + }, |
732 | 780 | "outputs": [],
|
733 | 781 | "source": [
|
734 | 782 | "sm.delete_endpoint(EndpointName=sharded_endpoint)\n",
|
|
0 commit comments