From a466bf0a340d0cc75612964132269464675a1ca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Dos=20Santos?= Date: Thu, 7 Dec 2017 14:34:00 -0800 Subject: [PATCH 1/7] capture ValidationException errors when deleting endpoints to not override previous exceptions --- tests/integ/timeout.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/integ/timeout.py b/tests/integ/timeout.py index 4a5e0d23d8..5fd2ac0578 100644 --- a/tests/integ/timeout.py +++ b/tests/integ/timeout.py @@ -14,6 +14,8 @@ from contextlib import contextmanager import logging +from botocore.exceptions import ClientError + LOGGER = logging.getLogger('timeout') @@ -59,8 +61,13 @@ def timeout_and_delete_endpoint(estimator, seconds=0, minutes=0, hours=0): try: yield [t] finally: - estimator.delete_endpoint() - LOGGER.info('deleted endpoint') + try: + estimator.delete_endpoint() + LOGGER.info('deleted endpoint') + except ClientError as ce: + if ce.response['Error']['Code'] == 'ValidationException': + # avoids the inner exception to be overwritten + pass @contextmanager @@ -69,5 +76,10 @@ def timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, second try: yield [t] finally: - sagemaker_session.delete_endpoint(endpoint_name) - LOGGER.info('deleted endpoint {}'.format(endpoint_name)) + try: + sagemaker_session.delete_endpoint(endpoint_name) + LOGGER.info('deleted endpoint {}'.format(endpoint_name)) + except ClientError as ce: + if ce.response['Error']['Code'] == 'ValidationException': + # avoids the inner exception to be overwritten + pass From 4fb4bf01317aabb6c4ddc2ca62f8968286c20d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Dos=20Santos?= Date: Mon, 11 Dec 2017 12:40:51 -0800 Subject: [PATCH 2/7] adding custom prediction to cifar 10 test --- tests/data/cifar_10/source/resnet_cifar_10.py | 27 ++++++++++----- tests/integ/test_tf_cifar.py | 34 ++++++++++++++++++- 2 files changed, 51 insertions(+), 10 deletions(-) diff --git a/tests/data/cifar_10/source/resnet_cifar_10.py b/tests/data/cifar_10/source/resnet_cifar_10.py index e20d149ec8..cccae2d308 100644 --- a/tests/data/cifar_10/source/resnet_cifar_10.py +++ b/tests/data/cifar_10/source/resnet_cifar_10.py @@ -2,6 +2,8 @@ from __future__ import division from __future__ import print_function +import pickle + import resnet_model import tensorflow as tf @@ -106,21 +108,19 @@ def model_fn(features, labels, mode, params): def serving_input_fn(hyperpameters): - feature_spec = {INPUT_TENSOR_NAME: tf.FixedLenFeature(dtype=tf.float32, shape=(32, 32, 3))} - return tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)() + inputs = {INPUT_TENSOR_NAME: tf.placeholder(tf.float32, [None, 32, 32, 3])} + return tf.estimator.export.ServingInputReceiver(inputs, inputs) -def train_input_fn(training_dir, hyperpameters): - return input_fn(tf.estimator.ModeKeys.TRAIN, - batch_size=BATCH_SIZE, data_dir=training_dir) +def train_input_fn(training_dir, hyperparameters): + return _input_fn(tf.estimator.ModeKeys.TRAIN, batch_size=BATCH_SIZE) -def eval_input_fn(training_dir, hyperpameters): - return input_fn(tf.estimator.ModeKeys.EVAL, - batch_size=BATCH_SIZE, data_dir=training_dir) +def eval_input_fn(training_dir, hyperparameters): + return _input_fn(tf.estimator.ModeKeys.EVAL, batch_size=BATCH_SIZE) -def input_fn(mode, batch_size, data_dir): +def _input_fn(mode, batch_size): input_shape = [batch_size, HEIGHT, WIDTH, DEPTH] images = tf.truncated_normal( input_shape, @@ -138,3 +138,12 @@ def input_fn(mode, batch_size, data_dir): labels = tf.contrib.framework.local_variable(labels, name='labels') return {INPUT_TENSOR_NAME: images}, labels + + +def input_fn(serialized_data, content_type): + data = pickle.loads(serialized_data) + return data + + +def output_fn(data, accepts): + return pickle.dumps(data) diff --git a/tests/integ/test_tf_cifar.py b/tests/integ/test_tf_cifar.py index ef5ebfd82b..982667de06 100644 --- a/tests/integ/test_tf_cifar.py +++ b/tests/integ/test_tf_cifar.py @@ -10,7 +10,10 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +import pickle + import boto3 +import numpy as np import os import pytest @@ -19,12 +22,34 @@ from tests.integ import DATA_DIR, REGION from tests.integ.timeout import timeout_and_delete_endpoint, timeout +PICKLE_CONTENT_TYPE = "application/python-pickle" + @pytest.fixture(scope='module') def sagemaker_session(): return Session(boto_session=boto3.Session(region_name=REGION)) +class PickleSerializer(object): + def __init__(self): + self.content_type = PICKLE_CONTENT_TYPE + + def __call__(self, data): + return pickle.dumps(data) + + +class PickleDeserializer(object): + def __init__(self): + self.accept = PICKLE_CONTENT_TYPE + + def __call__(self, stream, content_type): + try: + data = stream.read().decode() + return pickle.loads(data) + finally: + stream.close() + + def test_cifar(sagemaker_session): with timeout(minutes=15): script_path = os.path.join(DATA_DIR, 'cifar_10', 'source') @@ -42,4 +67,11 @@ def test_cifar(sagemaker_session): print('job succeeded: {}'.format(estimator.latest_training_job.name)) with timeout_and_delete_endpoint(estimator=estimator, minutes=20): - estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge') + predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.p2.xlarge') + predictor.serializer = PickleSerializer() + predictor.deserializer = PickleDeserializer() + + data = np.random.randn(32, 32, 3) + predict_response = predictor.predict(data) + + assert len(predict_response.outputs['probabilities'].float_val) == 10 From 2f5f92783b6fc141a4a93d16a90c2281e0f202c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Dos=20Santos?= Date: Mon, 11 Dec 2017 12:53:51 -0800 Subject: [PATCH 3/7] improved documentation about input and output funtions --- README.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 4a76573766..ba380fe5dd 100644 --- a/README.rst +++ b/README.rst @@ -1358,11 +1358,11 @@ An example of ``input_fn`` for the content-type "application/python-pickle" can import numpy as np - def input_fn(data, content_type): - """An input_fn that loads a pickled numpy array""" + def input_fn(serialized_input, content_type): + """An input_fn that loads a pickled object""" if request_content_type == "application/python-pickle": - array = np.load(StringIO(request_body)) - return array.reshape(model.data_shpaes[0]) + deserialized_input = pickle.loads(serialized_input) + return deserialized_input else: # Handle other content-types here or raise an Exception # if the content type is not supported. @@ -1377,7 +1377,7 @@ An example of ``output_fn`` for the accept type "application/python-pickle" can import numpy as np - def output_fn(data, accepts): + def output_fn(prediction_result, accepts): """An output_fn that dumps a pickled numpy as response""" if request_content_type == "application/python-pickle": return np.dumps(data) @@ -1386,6 +1386,9 @@ An example of ``output_fn`` for the accept type "application/python-pickle" can # if the content type is not supported. pass +A example with the ``input_fn`` and ``output_fn`` above can be find in +`here `_. + SageMaker TensorFlow Docker containers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 96165dd02ca913bacf44103a1569f7f49af747de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Dos=20Santos?= Date: Mon, 11 Dec 2017 14:47:07 -0800 Subject: [PATCH 4/7] fix grammar mistakes --- README.rst | 4 ++-- tests/data/cifar_10/source/resnet_cifar_10.py | 9 ++++----- tests/integ/test_tf_cifar.py | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index ba380fe5dd..fcab31b79d 100644 --- a/README.rst +++ b/README.rst @@ -1380,13 +1380,13 @@ An example of ``output_fn`` for the accept type "application/python-pickle" can def output_fn(prediction_result, accepts): """An output_fn that dumps a pickled numpy as response""" if request_content_type == "application/python-pickle": - return np.dumps(data) + return np.dumps(prediction_result) else: # Handle other content-types here or raise an Exception # if the content type is not supported. pass -A example with the ``input_fn`` and ``output_fn`` above can be find in +A example with ``input_fn`` and ``output_fn`` above can be found in `here `_. SageMaker TensorFlow Docker containers diff --git a/tests/data/cifar_10/source/resnet_cifar_10.py b/tests/data/cifar_10/source/resnet_cifar_10.py index cccae2d308..8fad7fc9f0 100644 --- a/tests/data/cifar_10/source/resnet_cifar_10.py +++ b/tests/data/cifar_10/source/resnet_cifar_10.py @@ -113,14 +113,14 @@ def serving_input_fn(hyperpameters): def train_input_fn(training_dir, hyperparameters): - return _input_fn(tf.estimator.ModeKeys.TRAIN, batch_size=BATCH_SIZE) + return _generate_synthetic_data(tf.estimator.ModeKeys.TRAIN, batch_size=BATCH_SIZE) def eval_input_fn(training_dir, hyperparameters): - return _input_fn(tf.estimator.ModeKeys.EVAL, batch_size=BATCH_SIZE) + return _generate_synthetic_data(tf.estimator.ModeKeys.EVAL, batch_size=BATCH_SIZE) -def _input_fn(mode, batch_size): +def _generate_synthetic_data(mode, batch_size): input_shape = [batch_size, HEIGHT, WIDTH, DEPTH] images = tf.truncated_normal( input_shape, @@ -141,8 +141,7 @@ def _input_fn(mode, batch_size): def input_fn(serialized_data, content_type): - data = pickle.loads(serialized_data) - return data + return pickle.loads(serialized_data) def output_fn(data, accepts): diff --git a/tests/integ/test_tf_cifar.py b/tests/integ/test_tf_cifar.py index 982667de06..9580dbd967 100644 --- a/tests/integ/test_tf_cifar.py +++ b/tests/integ/test_tf_cifar.py @@ -22,7 +22,7 @@ from tests.integ import DATA_DIR, REGION from tests.integ.timeout import timeout_and_delete_endpoint, timeout -PICKLE_CONTENT_TYPE = "application/python-pickle" +PICKLE_CONTENT_TYPE = 'application/python-pickle' @pytest.fixture(scope='module') From f022f2b145bc6185adba2b8a3f830947c5743b7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Dos=20Santos?= Date: Mon, 11 Dec 2017 14:57:13 -0800 Subject: [PATCH 5/7] fix comment in README file --- README.rst | 2 +- tests/integ/a.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 tests/integ/a.py diff --git a/README.rst b/README.rst index 154dcd8130..8318eea474 100644 --- a/README.rst +++ b/README.rst @@ -1378,7 +1378,7 @@ An example of ``output_fn`` for the accept type "application/python-pickle" can import numpy as np def output_fn(prediction_result, accepts): - """An output_fn that dumps a pickled numpy as response""" + """An output_fn that dumps a pickled object as response""" if request_content_type == "application/python-pickle": return np.dumps(prediction_result) else: diff --git a/tests/integ/a.py b/tests/integ/a.py new file mode 100644 index 0000000000..d4591eac1a --- /dev/null +++ b/tests/integ/a.py @@ -0,0 +1,41 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import boto3 +import os + +from sagemaker import Session +from sagemaker.tensorflow import TensorFlow +from tests.integ import DATA_DIR, REGION + +DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz' + + +sagemaker_session = Session(boto_session=boto3.Session(region_name=REGION)) + + +script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') +data_path = os.path.join(DATA_DIR, 'iris', 'data') + +estimator = TensorFlow(entry_point=script_path, + role='SageMakerRole', + training_steps=1000, + evaluation_steps=100, + hyperparameters={'input_tensor_name': 'inputs'}, + train_instance_count=2, + train_instance_type='ml.c4.xlarge', + sagemaker_session=sagemaker_session, + base_job_name='test-tf') + +inputs = estimator.sagemaker_session.upload_data(path=data_path, key_prefix='integ-test-data/tf_iris') +estimator.fit(inputs) +print('job succeeded: {}'.format(estimator.latest_training_job.name)) From 1da65a9d18f563f2125a244115dc712c46e02f6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Dos=20Santos?= Date: Mon, 11 Dec 2017 15:00:22 -0800 Subject: [PATCH 6/7] remove file mistakenly added --- tests/integ/a.py | 41 ----------------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 tests/integ/a.py diff --git a/tests/integ/a.py b/tests/integ/a.py deleted file mode 100644 index d4591eac1a..0000000000 --- a/tests/integ/a.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import boto3 -import os - -from sagemaker import Session -from sagemaker.tensorflow import TensorFlow -from tests.integ import DATA_DIR, REGION - -DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz' - - -sagemaker_session = Session(boto_session=boto3.Session(region_name=REGION)) - - -script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') -data_path = os.path.join(DATA_DIR, 'iris', 'data') - -estimator = TensorFlow(entry_point=script_path, - role='SageMakerRole', - training_steps=1000, - evaluation_steps=100, - hyperparameters={'input_tensor_name': 'inputs'}, - train_instance_count=2, - train_instance_type='ml.c4.xlarge', - sagemaker_session=sagemaker_session, - base_job_name='test-tf') - -inputs = estimator.sagemaker_session.upload_data(path=data_path, key_prefix='integ-test-data/tf_iris') -estimator.fit(inputs) -print('job succeeded: {}'.format(estimator.latest_training_job.name)) From bf6cc681bb2fd9c0af8642fdb339cb92947f12f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Dos=20Santos?= Date: Fri, 15 Dec 2017 14:53:27 -0800 Subject: [PATCH 7/7] fix test prediction for python 3.0 --- tests/data/cifar_10/source/resnet_cifar_10.py | 4 ---- tests/integ/test_tf_cifar.py | 19 +++---------------- 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/tests/data/cifar_10/source/resnet_cifar_10.py b/tests/data/cifar_10/source/resnet_cifar_10.py index 8fad7fc9f0..48ce081bfa 100644 --- a/tests/data/cifar_10/source/resnet_cifar_10.py +++ b/tests/data/cifar_10/source/resnet_cifar_10.py @@ -142,7 +142,3 @@ def _generate_synthetic_data(mode, batch_size): def input_fn(serialized_data, content_type): return pickle.loads(serialized_data) - - -def output_fn(data, accepts): - return pickle.dumps(data) diff --git a/tests/integ/test_tf_cifar.py b/tests/integ/test_tf_cifar.py index 9580dbd967..c08426a93a 100644 --- a/tests/integ/test_tf_cifar.py +++ b/tests/integ/test_tf_cifar.py @@ -35,19 +35,7 @@ def __init__(self): self.content_type = PICKLE_CONTENT_TYPE def __call__(self, data): - return pickle.dumps(data) - - -class PickleDeserializer(object): - def __init__(self): - self.accept = PICKLE_CONTENT_TYPE - - def __call__(self, stream, content_type): - try: - data = stream.read().decode() - return pickle.loads(data) - finally: - stream.close() + return pickle.dumps(data, protocol=2) def test_cifar(sagemaker_session): @@ -69,9 +57,8 @@ def test_cifar(sagemaker_session): with timeout_and_delete_endpoint(estimator=estimator, minutes=20): predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.p2.xlarge') predictor.serializer = PickleSerializer() - predictor.deserializer = PickleDeserializer() + predictor.content_type = PICKLE_CONTENT_TYPE data = np.random.randn(32, 32, 3) predict_response = predictor.predict(data) - - assert len(predict_response.outputs['probabilities'].float_val) == 10 + assert len(predict_response['outputs']['probabilities']['floatVal']) == 10