|
256 | 256 | "source": [
|
257 | 257 | "### Building and registering the container\n",
|
258 | 258 | "\n",
|
259 |
| - "The following shell code shows how to build the container image using `docker build` and push the container image to ECR using `docker push`. This code is also available as the shell script `container/build-and-push.sh`, which you can run as `build-and-push.sh tensorflow_cifar10_example` to build the image `tensorflow_cifar10_example`. \n", |
| 259 | + "The following shell code shows how to build the container image using `docker build` and push the container image to ECR using `docker push`. This code is also available as the shell script `container/build-and-push.sh`, which you can run as `build-and-push.sh tensorflow-cifar10-example` to build the image `tensorflow-cifar10-example`. \n", |
260 | 260 | "\n",
|
261 | 261 | "This code looks for an ECR repository in the account you're using and the current default region (if you're using a SageMaker notebook instance, this is the region where the notebook instance was created). If the repository doesn't exist, the script will create it."
|
262 | 262 | ]
|
|
270 | 270 | "%%sh\n",
|
271 | 271 | "\n",
|
272 | 272 | "# The name of our algorithm\n",
|
273 |
| - "algorithm_name=tensorflow_cifar10_example\n", |
| 273 | + "algorithm_name=tensorflow-cifar10-example\n", |
274 | 274 | "\n",
|
275 | 275 | "cd container\n",
|
276 | 276 | "\n",
|
|
321 | 321 | "source": [
|
322 | 322 | "## Download the CIFAR-10 dataset\n",
|
323 | 323 | "Our training algorithm is expecting our training data to be in the file format of [TFRecords](https://www.tensorflow.org/guide/datasets), which is a simple record-oriented binary format that many TensorFlow applications use for training data.\n",
|
324 |
| - "Below is a python script from the official TensorFlow CIFAR10 example, which downloads the CIFAR-10 dataset and converts them into TFRecords." |
| 324 | + "Below is a Python script adapted from the [official TensorFlow CIFAR-10 example](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10_estimator), which downloads the CIFAR-10 dataset and converts them into TFRecords." |
325 | 325 | ]
|
326 | 326 | },
|
327 | 327 | {
|
|
335 | 335 | },
|
336 | 336 | {
|
337 | 337 | "cell_type": "code",
|
338 |
| - "execution_count": null, |
| 338 | + "execution_count": 2, |
339 | 339 | "metadata": {},
|
340 |
| - "outputs": [], |
| 340 | + "outputs": [ |
| 341 | + { |
| 342 | + "name": "stdout", |
| 343 | + "output_type": "stream", |
| 344 | + "text": [ |
| 345 | + "eval.tfrecords\ttrain.tfrecords validation.tfrecords\r\n" |
| 346 | + ] |
| 347 | + } |
| 348 | + ], |
341 | 349 | "source": [
|
342 |
| - "! ls /tmp/cifar-10-data " |
| 350 | + "# There should be three tfrecords. (eval, train, validation)\n", |
| 351 | + "! ls /tmp/cifar-10-data" |
343 | 352 | ]
|
344 | 353 | },
|
345 | 354 | {
|
|
359 | 368 | },
|
360 | 369 | {
|
361 | 370 | "cell_type": "code",
|
362 |
| - "execution_count": 5, |
| 371 | + "execution_count": 3, |
363 | 372 | "metadata": {},
|
364 | 373 | "outputs": [],
|
365 | 374 | "source": [
|
|
387 | 396 | },
|
388 | 397 | {
|
389 | 398 | "cell_type": "code",
|
390 |
| - "execution_count": null, |
| 399 | + "execution_count": 4, |
391 | 400 | "metadata": {},
|
392 |
| - "outputs": [], |
| 401 | + "outputs": [ |
| 402 | + { |
| 403 | + "name": "stdout", |
| 404 | + "output_type": "stream", |
| 405 | + "text": [ |
| 406 | + "SageMaker instance route table setup is ok. We are good to go.\r\n", |
| 407 | + "SageMaker instance routing for Docker is ok. We are good to go!\r\n" |
| 408 | + ] |
| 409 | + } |
| 410 | + ], |
393 | 411 | "source": [
|
| 412 | + "# Lets set up our SageMaker notebook instance for local mode.\n", |
394 | 413 | "!/bin/bash ./utils/setup.sh"
|
395 | 414 | ]
|
396 | 415 | },
|
|
428 | 447 | },
|
429 | 448 | {
|
430 | 449 | "cell_type": "code",
|
431 |
| - "execution_count": 11, |
| 450 | + "execution_count": null, |
| 451 | + "metadata": {}, |
| 452 | + "outputs": [], |
| 453 | + "source": [ |
| 454 | + "! pip install opencv-python" |
| 455 | + ] |
| 456 | + }, |
| 457 | + { |
| 458 | + "cell_type": "code", |
| 459 | + "execution_count": 6, |
432 | 460 | "metadata": {},
|
433 | 461 | "outputs": [
|
434 | 462 | {
|
435 | 463 | "name": "stdout",
|
436 | 464 | "output_type": "stream",
|
437 | 465 | "text": [
|
438 |
| - "Collecting opencv-python\n", |
439 |
| - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/53/e0/21c8964fa8ef50842ebefaa7346a3cf0e37b56c8ecd97ed6bd2dbe577705/opencv_python-3.4.2.17-cp36-cp36m-manylinux1_x86_64.whl (25.0MB)\n", |
440 |
| - "\u001b[K 100% |████████████████████████████████| 25.0MB 2.1MB/s eta 0:00:01\n", |
441 |
| - "\u001b[?25hRequirement already satisfied: numpy>=1.11.3 in /home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages (from opencv-python) (1.14.5)\n", |
442 |
| - "\u001b[31mdistributed 1.21.8 requires msgpack, which is not installed.\u001b[0m\n", |
443 |
| - "Installing collected packages: opencv-python\n", |
444 |
| - "Successfully installed opencv-python-3.4.2.17\n", |
445 |
| - "\u001b[33mYou are using pip version 10.0.1, however version 18.0 is available.\n", |
446 |
| - "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" |
| 466 | + "\u001b[36malgo-1-L58J2_1 |\u001b[0m 172.18.0.1 - - [03/Aug/2018:22:32:52 +0000] \"POST /invocations HTTP/1.1\" 200 229 \"-\" \"-\"\r\n" |
447 | 467 | ]
|
| 468 | + }, |
| 469 | + { |
| 470 | + "data": { |
| 471 | + "text/plain": [ |
| 472 | + "{'predictions': [{'probabilities': [2.29861e-05,\n", |
| 473 | + " 0.0104983,\n", |
| 474 | + " 0.147974,\n", |
| 475 | + " 0.01538,\n", |
| 476 | + " 0.0478089,\n", |
| 477 | + " 0.00164997,\n", |
| 478 | + " 0.758483,\n", |
| 479 | + " 0.0164191,\n", |
| 480 | + " 0.00125304,\n", |
| 481 | + " 0.000510801],\n", |
| 482 | + " 'classes': 6}]}" |
| 483 | + ] |
| 484 | + }, |
| 485 | + "execution_count": 6, |
| 486 | + "metadata": {}, |
| 487 | + "output_type": "execute_result" |
448 | 488 | }
|
449 | 489 | ],
|
450 |
| - "source": [ |
451 |
| - "! pip install opencv-python" |
452 |
| - ] |
453 |
| - }, |
454 |
| - { |
455 |
| - "cell_type": "code", |
456 |
| - "execution_count": null, |
457 |
| - "metadata": {}, |
458 |
| - "outputs": [], |
459 | 490 | "source": [
|
460 | 491 | "import cv2\n",
|
461 | 492 | "import numpy\n",
|
|
484 | 515 | },
|
485 | 516 | {
|
486 | 517 | "cell_type": "code",
|
487 |
| - "execution_count": 25, |
| 518 | + "execution_count": null, |
488 | 519 | "metadata": {},
|
489 |
| - "outputs": [ |
490 |
| - { |
491 |
| - "name": "stderr", |
492 |
| - "output_type": "stream", |
493 |
| - "text": [ |
494 |
| - "INFO:sagemaker:Deleting endpoint with name: tensorflow_cifar10_example-2018-08-03-18-06-55-168\n" |
495 |
| - ] |
496 |
| - }, |
497 |
| - { |
498 |
| - "name": "stdout", |
499 |
| - "output_type": "stream", |
500 |
| - "text": [ |
501 |
| - "Gracefully stopping... (press Ctrl+C again to force)\n", |
502 |
| - "Stopping tmp3n0u5hj2_algo-1-HCRIC_1 ... \r\n", |
503 |
| - "\u001b[1A\u001b[2K\r", |
504 |
| - "Stopping tmp3n0u5hj2_algo-1-HCRIC_1 ... \u001b[32mdone\u001b[0m\r", |
505 |
| - "\u001b[1B" |
506 |
| - ] |
507 |
| - } |
508 |
| - ], |
| 520 | + "outputs": [], |
509 | 521 | "source": [
|
510 | 522 | "predictor.delete_endpoint()"
|
511 | 523 | ]
|
|
523 | 535 | },
|
524 | 536 | {
|
525 | 537 | "cell_type": "code",
|
526 |
| - "execution_count": 26, |
| 538 | + "execution_count": null, |
527 | 539 | "metadata": {},
|
528 | 540 | "outputs": [],
|
529 | 541 | "source": [
|
|
542 | 554 | },
|
543 | 555 | {
|
544 | 556 | "cell_type": "code",
|
545 |
| - "execution_count": 27, |
| 557 | + "execution_count": null, |
546 | 558 | "metadata": {},
|
547 | 559 | "outputs": [],
|
548 | 560 | "source": [
|
|
564 | 576 | },
|
565 | 577 | {
|
566 | 578 | "cell_type": "code",
|
567 |
| - "execution_count": 28, |
| 579 | + "execution_count": null, |
568 | 580 | "metadata": {},
|
569 | 581 | "outputs": [],
|
570 | 582 | "source": [
|
|
580 | 592 | "## Training on SageMaker\n",
|
581 | 593 | "Training a model on SageMaker with the Python SDK is done in a way that is similar to the way we trained it locally. This is done by changing our train_instance_type from `local` to one of our [supported EC2 instance types](https://aws.amazon.com/sagemaker/pricing/instance-types/).\n",
|
582 | 594 | "\n",
|
583 |
| - "In addition, we must now specify the ECR image URL, which we just pushed above. Be sure to replace the string within the Estimator parameter, image_name.\n", |
| 595 | + "In addition, we must now specify the ECR image URL, which we just pushed above.\n", |
| 596 | + "\n", |
| 597 | + "Finally, our local training dataset has to be in Amazon S3 and the S3 URL to our dataset is passed into the `fit()` call.\n", |
| 598 | + "\n", |
| 599 | + "Let's first fetch our ECR image url that corresponds to the image we just built and pushed." |
| 600 | + ] |
| 601 | + }, |
| 602 | + { |
| 603 | + "cell_type": "code", |
| 604 | + "execution_count": null, |
| 605 | + "metadata": {}, |
| 606 | + "outputs": [], |
| 607 | + "source": [ |
| 608 | + "import boto3\n", |
| 609 | + "\n", |
| 610 | + "client = boto3.client('sts')\n", |
| 611 | + "account = client.get_caller_identity()['Account']\n", |
584 | 612 | "\n",
|
585 |
| - "Finally, our local training dataset has to be in Amazon S3 and the S3 URL to our dataset is passed into the `fit()` call." |
| 613 | + "my_session = boto3.session.Session()\n", |
| 614 | + "region = my_session.region_name\n", |
| 615 | + "\n", |
| 616 | + "algorithm_name = 'tensorflow-cifar10-example'\n", |
| 617 | + "\n", |
| 618 | + "ecr_image = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, algorithm_name)\n", |
| 619 | + "\n", |
| 620 | + "print(ecr_image)" |
586 | 621 | ]
|
587 | 622 | },
|
588 | 623 | {
|
|
600 | 635 | "estimator = Estimator(role=role,\n",
|
601 | 636 | " train_instance_count=1,\n",
|
602 | 637 | " train_instance_type=instance_type,\n",
|
603 |
| - " image_name='ecr-image',\n", |
| 638 | + " image_name=ecr_image,\n", |
604 | 639 | " hyperparameters=hyperparameters)\n",
|
605 | 640 | "\n",
|
606 | 641 | "estimator.fit(data_location)\n",
|
|
610 | 645 | },
|
611 | 646 | {
|
612 | 647 | "cell_type": "code",
|
613 |
| - "execution_count": 33, |
| 648 | + "execution_count": null, |
614 | 649 | "metadata": {},
|
615 |
| - "outputs": [ |
616 |
| - { |
617 |
| - "data": { |
618 |
| - "text/plain": [ |
619 |
| - "{'predictions': [{'probabilities': [0.115806,\n", |
620 |
| - " 0.119459,\n", |
621 |
| - " 0.028497,\n", |
622 |
| - " 0.348986,\n", |
623 |
| - " 0.102692,\n", |
624 |
| - " 0.0354596,\n", |
625 |
| - " 0.0917221,\n", |
626 |
| - " 0.00540253,\n", |
627 |
| - " 0.121872,\n", |
628 |
| - " 0.0301034],\n", |
629 |
| - " 'classes': 3}]}" |
630 |
| - ] |
631 |
| - }, |
632 |
| - "execution_count": 33, |
633 |
| - "metadata": {}, |
634 |
| - "output_type": "execute_result" |
635 |
| - } |
636 |
| - ], |
| 650 | + "outputs": [], |
637 | 651 | "source": [
|
638 | 652 | "image = cv2.imread(\"data/cat.png\", 1)\n",
|
639 | 653 | "\n",
|
|
662 | 676 | },
|
663 | 677 | {
|
664 | 678 | "cell_type": "code",
|
665 |
| - "execution_count": 25, |
| 679 | + "execution_count": null, |
666 | 680 | "metadata": {},
|
667 |
| - "outputs": [ |
668 |
| - { |
669 |
| - "name": "stdout", |
670 |
| - "output_type": "stream", |
671 |
| - "text": [ |
672 |
| - "b'{\\n \"predictions\": [\\n {\\n \"classes\": 3,\\n \"probabilities\": [0.122724, 0.0958609, 0.0519071, 0.272535, 0.097384, 0.0535893, 0.0905842, 0.0250508, 0.123435, 0.0669298]\\n }\\n ]\\n}'\n" |
673 |
| - ] |
674 |
| - } |
675 |
| - ], |
| 681 | + "outputs": [], |
676 | 682 | "source": [
|
677 | 683 | "import json\n",
|
678 | 684 | "\n",
|
|
681 | 687 | "endpoint_name = predictor.endpoint\n",
|
682 | 688 | "\n",
|
683 | 689 | "response = client.invoke_endpoint(EndpointName=endpoint_name, Body=json.dumps(data))\n",
|
684 |
| - "response_body = response['Body'].decode('utf-8')\n", |
| 690 | + "response_body = response['Body']\n", |
685 | 691 | "\n",
|
686 |
| - "print(response_body.read())" |
| 692 | + "print(response_body.read().decode('utf-8'))" |
687 | 693 | ]
|
688 | 694 | },
|
689 | 695 | {
|
|
0 commit comments