Skip to content

Commit f610d05

Browse files
author
Dan Choi
committed
Allow local mode and add image
1 parent fbb1466 commit f610d05

File tree

4 files changed

+158
-148
lines changed

4 files changed

+158
-148
lines changed
Loading

advanced_functionality/tensorflow_bring_your_own/tensorflow_bring_your_own.ipynb

Lines changed: 80 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -213,20 +213,36 @@
213213
"name": "stdout",
214214
"output_type": "stream",
215215
"text": [
216-
"# https://docs.docker.com/compose/gettingstarted/#step-2-create-a-dockerfile\n",
217-
"FROM tensorflow/tensorflow:1.8.0-py3\n",
218-
"\n",
219-
"RUN apt-get update && apt-get install -y --no-install-recommends nginx curl\n",
220-
"\n",
221-
"# Download tensorflow serving\n",
222-
"RUN echo \"deb [arch=amd64] http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal\" | tee /etc/apt/sources.list.d/tensorflow-serving.list\n",
223-
"RUN curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -\n",
224-
"RUN apt-get update && apt-get install tensorflow-model-server\n",
225-
"\n",
226-
"ENV PATH=\"/opt/ml/code:${PATH}\"\n",
227-
"\n",
228-
"COPY /cifar10 /opt/ml/code\n",
229-
"WORKDIR /opt/ml/code\n"
216+
"# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.\r\n",
217+
"#\r\n",
218+
"# Licensed under the Apache License, Version 2.0 (the \"License\"). You\r\n",
219+
"# may not use this file except in compliance with the License. A copy of\r\n",
220+
"# the License is located at\r\n",
221+
"#\r\n",
222+
"# http://aws.amazon.com/apache2.0/\r\n",
223+
"#\r\n",
224+
"# or in the \"license\" file accompanying this file. This file is\r\n",
225+
"# distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\r\n",
226+
"# ANY KIND, either express or implied. See the License for the specific\r\n",
227+
"# language governing permissions and limitations under the License.\r\n",
228+
"\r\n",
229+
"# For more information on creating a Dockerfile\r\n",
230+
"# https://docs.docker.com/compose/gettingstarted/#step-2-create-a-dockerfile\r\n",
231+
"FROM tensorflow/tensorflow:1.8.0-py3\r\n",
232+
"\r\n",
233+
"RUN apt-get update && apt-get install -y --no-install-recommends nginx curl\r\n",
234+
"\r\n",
235+
"# Download TensorFlow Serving\r\n",
236+
"# https://www.tensorflow.org/serving/setup#installing_the_modelserver\r\n",
237+
"RUN echo \"deb [arch=amd64] http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal\" | tee /etc/apt/sources.list.d/tensorflow-serving.list\r\n",
238+
"RUN curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -\r\n",
239+
"RUN apt-get update && apt-get install tensorflow-model-server\r\n",
240+
"\r\n",
241+
"ENV PATH=\"/opt/ml/code:${PATH}\"\r\n",
242+
"\r\n",
243+
"# /opt/ml and all subdirectories are utilized by SageMaker, we use the /code subdirectory to store our user code.\r\n",
244+
"COPY /cifar10 /opt/ml/code\r\n",
245+
"WORKDIR /opt/ml/code"
230246
]
231247
}
232248
],
@@ -310,48 +326,18 @@
310326
},
311327
{
312328
"cell_type": "code",
313-
"execution_count": 1,
329+
"execution_count": null,
314330
"metadata": {},
315-
"outputs": [
316-
{
317-
"name": "stdout",
318-
"output_type": "stream",
319-
"text": [
320-
"/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
321-
" from ._conv import register_converters as _register_converters\n",
322-
"Download from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz and extract.\n",
323-
"WARNING:tensorflow:From utils/generate_cifar10_tfrecords.py:45: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n",
324-
"Instructions for updating:\n",
325-
"Please write your own downloading logic.\n",
326-
"WARNING:tensorflow:From /anaconda3/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:252: _internal_retry.<locals>.wrap.<locals>.wrapped_fn (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n",
327-
"Instructions for updating:\n",
328-
"Please use urllib or similar directly.\n",
329-
"Successfully downloaded cifar-10-python.tar.gz 170498071 bytes.\n",
330-
"Generating /tmp/cifar-10-data/train.tfrecords\n",
331-
"Generating /tmp/cifar-10-data/validation.tfrecords\n",
332-
"Generating /tmp/cifar-10-data/eval.tfrecords\n",
333-
"Removing original files.\n",
334-
"Done!\n"
335-
]
336-
}
337-
],
331+
"outputs": [],
338332
"source": [
339333
"! python utils/generate_cifar10_tfrecords.py --data-dir=/tmp/cifar-10-data"
340334
]
341335
},
342336
{
343337
"cell_type": "code",
344-
"execution_count": 2,
338+
"execution_count": null,
345339
"metadata": {},
346-
"outputs": [
347-
{
348-
"name": "stdout",
349-
"output_type": "stream",
350-
"text": [
351-
"eval.tfrecords train.tfrecords validation.tfrecords\n"
352-
]
353-
}
354-
],
340+
"outputs": [],
355341
"source": [
356342
"! ls /tmp/cifar-10-data "
357343
]
@@ -373,7 +359,7 @@
373359
},
374360
{
375361
"cell_type": "code",
376-
"execution_count": 2,
362+
"execution_count": 5,
377363
"metadata": {},
378364
"outputs": [],
379365
"source": [
@@ -401,71 +387,18 @@
401387
},
402388
{
403389
"cell_type": "code",
404-
"execution_count": 3,
390+
"execution_count": null,
405391
"metadata": {},
406-
"outputs": [
407-
{
408-
"name": "stderr",
409-
"output_type": "stream",
410-
"text": [
411-
"INFO:sagemaker:Creating training-job with name: tensorflow_cifar10_example-2018-08-01-08-13-48-683\n"
412-
]
413-
},
414-
{
415-
"name": "stdout",
416-
"output_type": "stream",
417-
"text": [
418-
"Attaching to tmp1mcb1n6k_algo-1-ASVN5_1\n",
419-
"Creating tmp1mcb1n6k_algo-1-ASVN5_1 ... \n",
420-
"\u001b[36malgo-1-ASVN5_1 |\u001b[0m Training complete.\n",
421-
"\u001b[1B\u001b[36mtmp1mcb1n6k_algo-1-ASVN5_1 exited with code 0\n",
422-
"\u001b[0mAborting on container exit...\n",
423-
"===== Job Complete =====\n"
424-
]
425-
},
426-
{
427-
"name": "stderr",
428-
"output_type": "stream",
429-
"text": [
430-
"INFO:sagemaker:Creating model with name: tensorflow_cifar10_example-2018-08-01-08-14-21-042\n",
431-
"INFO:sagemaker:Creating endpoint with name tensorflow_cifar10_example-2018-08-01-08-13-48-683\n",
432-
"WARNING:urllib3.connectionpool:Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<urllib3.connection.HTTPConnection object at 0x10ba8d5c0>: Failed to establish a new connection: [Errno 61] Connection refused',)': /ping\n",
433-
"WARNING:urllib3.connectionpool:Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<urllib3.connection.HTTPConnection object at 0x10ba8d048>: Failed to establish a new connection: [Errno 61] Connection refused',)': /ping\n",
434-
"WARNING:urllib3.connectionpool:Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<urllib3.connection.HTTPConnection object at 0x10bc12748>: Failed to establish a new connection: [Errno 61] Connection refused',)': /ping\n",
435-
"WARNING:urllib3.connectionpool:Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))': /ping\n",
436-
"WARNING:urllib3.connectionpool:Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))': /ping\n",
437-
"WARNING:urllib3.connectionpool:Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))': /ping\n"
438-
]
439-
},
440-
{
441-
"name": "stdout",
442-
"output_type": "stream",
443-
"text": [
444-
"Attaching to tmpi66tdkz1_algo-1-DZB6N_1\n",
445-
"Creating tmpi66tdkz1_algo-1-DZB6N_1 ... \n",
446-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m Starting the inference server with 2 workers.\n",
447-
"\u001b[1B\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:22.729603: I tensorflow_serving/model_servers/main.cc:153] Building single TensorFlow model file config: model_name: cifar10_model model_base_path: /opt/ml/model/export/Servo\n",
448-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:22.730876: I tensorflow_serving/model_servers/server_core.cc:459] Adding/updating models.\n",
449-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:22.730991: I tensorflow_serving/model_servers/server_core.cc:514] (Re-)adding model: cifar10_model\n",
450-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:22.850950: I tensorflow_serving/core/basic_manager.cc:716] Successfully reserved resources to load servable {name: cifar10_model version: 1533111259}\n",
451-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:22.851019: I tensorflow_serving/core/loader_harness.cc:66] Approving load for servable version {name: cifar10_model version: 1533111259}\n",
452-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:22.851048: I tensorflow_serving/core/loader_harness.cc:74] Loading servable version {name: cifar10_model version: 1533111259}\n",
453-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:22.852366: I external/org_tensorflow/tensorflow/contrib/session_bundle/bundle_shim.cc:360] Attempting to load native SavedModelBundle in bundle-shim from: /opt/ml/model/export/Servo/1533111259\n",
454-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:22.853667: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:242] Loading SavedModel with tags: { serve }; from: /opt/ml/model/export/Servo/1533111259\n",
455-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:22.872867: I external/org_tensorflow/tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
456-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:22.918331: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:161] Restoring SavedModel bundle.\n",
457-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:22.982049: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:196] Running LegacyInitOp on SavedModel bundle.\n",
458-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:23.016319: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:291] SavedModel load for tags { serve }; Status: success. Took 163280 microseconds.\n",
459-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:23.018543: I tensorflow_serving/servables/tensorflow/saved_model_warmup.cc:83] No warmup data file found at /opt/ml/model/export/Servo/1533111259/assets.extra/tf_serving_warmup_requests\n",
460-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:23.033650: I tensorflow_serving/core/loader_harness.cc:86] Successfully loaded servable version {name: cifar10_model version: 1533111259}\n",
461-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:23.038377: I tensorflow_serving/model_servers/main.cc:323] Running ModelServer at 0.0.0.0:8500 ...\n",
462-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m [warn] getaddrinfo: address family for nodename not supported\n",
463-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 2018-08-01 08:14:23.041668: I tensorflow_serving/model_servers/main.cc:333] Exporting HTTP/REST API at:localhost:8501 ...\n",
464-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m [evhttp_server.cc : 235] RAW: Entering the event loop ...\n",
465-
"!\u001b[36malgo-1-DZB6N_1 |\u001b[0m 172.18.0.1 - - [01/Aug/2018:08:14:23 +0000] \"GET /ping HTTP/1.1\" 200 2 \"-\" \"-\"\n"
466-
]
467-
}
468-
],
392+
"outputs": [],
393+
"source": [
394+
"!/bin/bash ./utils/setup.sh"
395+
]
396+
},
397+
{
398+
"cell_type": "code",
399+
"execution_count": null,
400+
"metadata": {},
401+
"outputs": [],
469402
"source": [
470403
"from sagemaker.estimator import Estimator\n",
471404
"\n",
@@ -490,49 +423,46 @@
490423
"source": [
491424
"## Making predictions using Python SDK\n",
492425
"\n",
493-
"In order to do some predictions, we will use an image, which will be converted using OpenCV, into a json format to send as an inference request."
426+
"In order to do some predictions, we will use an image, which will be converted using OpenCV, into a json format to send as an inference request. We need to install OpenCV as we using that to deserialize our image that we will use for predictions."
494427
]
495428
},
496429
{
497430
"cell_type": "code",
498-
"execution_count": 4,
431+
"execution_count": 11,
499432
"metadata": {},
500433
"outputs": [
501434
{
502435
"name": "stdout",
503436
"output_type": "stream",
504437
"text": [
505-
"\u001b[36malgo-1-DZB6N_1 |\u001b[0m 172.18.0.1 - - [01/Aug/2018:08:14:28 +0000] \"POST /invocations HTTP/1.1\" 200 236 \"-\" \"-\"\n"
438+
"Collecting opencv-python\n",
439+
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/53/e0/21c8964fa8ef50842ebefaa7346a3cf0e37b56c8ecd97ed6bd2dbe577705/opencv_python-3.4.2.17-cp36-cp36m-manylinux1_x86_64.whl (25.0MB)\n",
440+
"\u001b[K 100% |████████████████████████████████| 25.0MB 2.1MB/s eta 0:00:01\n",
441+
"\u001b[?25hRequirement already satisfied: numpy>=1.11.3 in /home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages (from opencv-python) (1.14.5)\n",
442+
"\u001b[31mdistributed 1.21.8 requires msgpack, which is not installed.\u001b[0m\n",
443+
"Installing collected packages: opencv-python\n",
444+
"Successfully installed opencv-python-3.4.2.17\n",
445+
"\u001b[33mYou are using pip version 10.0.1, however version 18.0 is available.\n",
446+
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
506447
]
507-
},
508-
{
509-
"data": {
510-
"text/plain": [
511-
"{'predictions': [{'probabilities': [0.0203898,\n",
512-
" 0.00186337,\n",
513-
" 0.00125851,\n",
514-
" 0.116194,\n",
515-
" 0.860205,\n",
516-
" 2.24463e-10,\n",
517-
" 3.87913e-06,\n",
518-
" 8.29227e-05,\n",
519-
" 1.4244e-06,\n",
520-
" 1.60669e-06],\n",
521-
" 'classes': 4}]}"
522-
]
523-
},
524-
"execution_count": 4,
525-
"metadata": {},
526-
"output_type": "execute_result"
527448
}
528449
],
450+
"source": [
451+
"! pip install opencv-python"
452+
]
453+
},
454+
{
455+
"cell_type": "code",
456+
"execution_count": null,
457+
"metadata": {},
458+
"outputs": [],
529459
"source": [
530460
"import cv2\n",
531461
"import numpy\n",
532462
"\n",
533463
"from sagemaker.predictor import json_serializer, json_deserializer\n",
534464
"\n",
535-
"image = cv2.imread(\"/path/to/image.png\", 1)\n",
465+
"image = cv2.imread(\"data/cat.png\", 1)\n",
536466
"\n",
537467
"# resize, as our model is expecting images in 32x32.\n",
538468
"image = cv2.resize(image, (32, 32))\n",
@@ -554,22 +484,24 @@
554484
},
555485
{
556486
"cell_type": "code",
557-
"execution_count": 5,
487+
"execution_count": 25,
558488
"metadata": {},
559489
"outputs": [
560490
{
561491
"name": "stderr",
562492
"output_type": "stream",
563493
"text": [
564-
"INFO:sagemaker:Deleting endpoint with name: tensorflow_cifar10_example-2018-08-01-08-13-48-683\n"
494+
"INFO:sagemaker:Deleting endpoint with name: tensorflow_cifar10_example-2018-08-03-18-06-55-168\n"
565495
]
566496
},
567497
{
568498
"name": "stdout",
569499
"output_type": "stream",
570500
"text": [
571501
"Gracefully stopping... (press Ctrl+C again to force)\n",
572-
"Stopping tmpi66tdkz1_algo-1-DZB6N_1 ... \n",
502+
"Stopping tmp3n0u5hj2_algo-1-HCRIC_1 ... \r\n",
503+
"\u001b[1A\u001b[2K\r",
504+
"Stopping tmp3n0u5hj2_algo-1-HCRIC_1 ... \u001b[32mdone\u001b[0m\r",
573505
"\u001b[1B"
574506
]
575507
}
@@ -591,7 +523,7 @@
591523
},
592524
{
593525
"cell_type": "code",
594-
"execution_count": 19,
526+
"execution_count": 26,
595527
"metadata": {},
596528
"outputs": [],
597529
"source": [
@@ -610,7 +542,7 @@
610542
},
611543
{
612544
"cell_type": "code",
613-
"execution_count": 20,
545+
"execution_count": 27,
614546
"metadata": {},
615547
"outputs": [],
616548
"source": [
@@ -632,7 +564,7 @@
632564
},
633565
{
634566
"cell_type": "code",
635-
"execution_count": null,
567+
"execution_count": 28,
636568
"metadata": {},
637569
"outputs": [],
638570
"source": [
@@ -648,7 +580,7 @@
648580
"## Training on SageMaker\n",
649581
"Training on SageMaker with the Python SDK is done in the same fashion as training locally. This is done by changing our train_instance_type from `local` to one of our [supported EC2 instance types](https://aws.amazon.com/sagemaker/pricing/instance-types/).\n",
650582
"\n",
651-
"In addition, we must now specify an ECR image url, which we just pushed above.\n",
583+
"In addition, we must now specify an ECR image url, which we just pushed above. Please make sure to replace the string within the Estimator parameter, image_name.\n",
652584
"\n",
653585
"Finally, our local training dataset has to be in Amazon S3 and the S3 url to our dataset will be passed into the `fit()` call."
654586
]
@@ -703,7 +635,7 @@
703635
}
704636
],
705637
"source": [
706-
"image = cv2.imread(\"/path/to/image.png\", 1)\n",
638+
"image = cv2.imread(\"data/cat.png\", 1)\n",
707639
"\n",
708640
"# resize, as our model is expecting images in 32x32.\n",
709641
"image = cv2.resize(image, (32, 32))\n",
@@ -789,9 +721,9 @@
789721
],
790722
"metadata": {
791723
"kernelspec": {
792-
"display_name": "Python 3",
724+
"display_name": "conda_tensorflow_p36",
793725
"language": "python",
794-
"name": "python3"
726+
"name": "conda_tensorflow_p36"
795727
},
796728
"language_info": {
797729
"codemirror_mode": {
@@ -803,7 +735,7 @@
803735
"name": "python",
804736
"nbconvert_exporter": "python",
805737
"pygments_lexer": "ipython3",
806-
"version": "3.6.5"
738+
"version": "3.6.4"
807739
}
808740
},
809741
"nbformat": 4,
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
2+
{
3+
"default-runtime": "nvidia",
4+
"runtimes": {
5+
"nvidia": {
6+
"path": "/usr/bin/nvidia-container-runtime",
7+
"runtimeArgs": []
8+
}
9+
}
10+
}

0 commit comments

Comments
 (0)