Skip to content

Improving endpoint deletion #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 7, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tests/integ/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import logging
import os
DATA_DIR = os.path.join(os.path.dirname(__file__), '..', 'data')
REGION = 'us-west-2'

logging.getLogger('boto3').setLevel(logging.INFO)
logging.getLogger('botocore').setLevel(logging.INFO)
25 changes: 12 additions & 13 deletions tests/integ/test_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,17 @@
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import gzip
import os
import pickle
import sys
import pytest # noqa

import boto3
import os

import sagemaker
from sagemaker import KMeans, KMeansModel

from sagemaker.utils import name_from_base
from tests.integ import DATA_DIR, REGION
from tests.integ.timeout import timeout
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name


def test_kmeans():
Expand Down Expand Up @@ -49,15 +50,13 @@ def test_kmeans():

kmeans.fit(kmeans.record_set(train_set[0][:100]))

with timeout(minutes=15):
endpoint_name = name_from_base('kmeans')
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=15):
model = KMeansModel(kmeans.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session)
predictor = model.deploy(1, 'ml.c4.xlarge')
predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)
result = predictor.predict(train_set[0][:10])

try:
assert len(result) == 10
for record in result:
assert record.label["closest_cluster"] is not None
assert record.label["distance_to_cluster"] is not None
finally:
sagemaker_session.delete_endpoint(predictor.endpoint)
assert len(result) == 10
for record in result:
assert record.label["closest_cluster"] is not None
assert record.label["distance_to_cluster"] is not None
22 changes: 11 additions & 11 deletions tests/integ/test_linear_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
import numpy as np
import sagemaker
from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerModel
from sagemaker.utils import name_from_base

from tests.integ import DATA_DIR, REGION
from tests.integ.timeout import timeout
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name


def test_linear_learner():
Expand Down Expand Up @@ -72,15 +73,14 @@ def test_linear_learner():
ll.num_point_for_scala = 10000
ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]))

with timeout(minutes=15):
endpoint_name = name_from_base('linear-learner')
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=15):

model = LinearLearnerModel(ll.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session)
predictor = model.deploy(1, 'ml.c4.xlarge')
predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)

try:
result = predictor.predict(train_set[0][0:100])
assert len(result) == 100
for record in result:
assert record.label["predicted_label"] is not None
assert record.label["score"] is not None
finally:
sagemaker_session.delete_endpoint(predictor.endpoint)
result = predictor.predict(train_set[0][0:100])
assert len(result) == 100
for record in result:
assert record.label["predicted_label"] is not None
assert record.label["score"] is not None
30 changes: 14 additions & 16 deletions tests/integ/test_mxnet_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from sagemaker.mxnet.model import MXNetModel

from tests.integ import DATA_DIR, REGION
from tests.integ.timeout import timeout
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name


@pytest.fixture(scope='module')
Expand Down Expand Up @@ -49,26 +49,24 @@ def mxnet_training_job(sagemaker_session):


def test_attach_deploy(mxnet_training_job, sagemaker_session):
with timeout(minutes=15):
endpoint_name = 'test-mxnet-attach-deploy-{}'.format(int(time.time()))

with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=15):
estimator = MXNet.attach(mxnet_training_job, sagemaker_session=sagemaker_session)
predictor = estimator.deploy(1, 'ml.m4.xlarge',
endpoint_name='test-mxnet-attach-deploy-{}'.format(int(time.time())))
try:
data = numpy.zeros(shape=(1, 1, 28, 28))
predictor.predict(data)
finally:
sagemaker_session.delete_endpoint(predictor.endpoint)
predictor = estimator.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
data = numpy.zeros(shape=(1, 1, 28, 28))
predictor.predict(data)


def test_deploy_model(mxnet_training_job, sagemaker_session):
with timeout(minutes=15):
endpoint_name = 'test-mxnet-deploy-model-{}'.format(int(time.time()))

with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=15):
desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=mxnet_training_job)
model_data = desc['ModelArtifacts']['S3ModelArtifacts']
script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py')
model = MXNetModel(model_data, 'SageMakerRole', entry_point=script_path, sagemaker_session=sagemaker_session)
predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name='test-mxnet-deploy-model-{}'.format(int(time.time())))
try:
data = numpy.zeros(shape=(1, 1, 28, 28))
predictor.predict(data)
finally:
sagemaker_session.delete_endpoint(predictor.endpoint)
predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)

data = numpy.zeros(shape=(1, 1, 28, 28))
predictor.predict(data)
20 changes: 10 additions & 10 deletions tests/integ/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
import boto3
import sagemaker
import sagemaker.amazon.pca
from sagemaker.utils import name_from_base

from tests.integ import DATA_DIR, REGION
from tests.integ.timeout import timeout
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name


def test_pca():
Expand All @@ -42,16 +43,15 @@ def test_pca():
pca.extra_components = 5
pca.fit(pca.record_set(train_set[0][:100]))

with timeout(minutes=15):
endpoint_name = name_from_base('pca')
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=15):
pca_model = sagemaker.amazon.pca.PCAModel(model_data=pca.model_data, role='SageMakerRole',
sagemaker_session=sagemaker_session)
predictor = pca_model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge")
predictor = pca_model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge",
endpoint_name=endpoint_name)

try:
result = predictor.predict(train_set[0][:5])
result = predictor.predict(train_set[0][:5])

assert len(result) == 5
for record in result:
assert record.label["projection"] is not None
finally:
sagemaker_session.delete_endpoint(predictor.endpoint)
assert len(result) == 5
for record in result:
assert record.label["projection"] is not None
49 changes: 7 additions & 42 deletions tests/integ/test_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,14 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import os

import boto3
import os
import pytest

from sagemaker import Session
from sagemaker.tensorflow import TensorFlow

from tests.integ import DATA_DIR, REGION
from tests.integ.timeout import timeout

DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
from tests.integ.timeout import timeout_and_delete_endpoint, timeout


@pytest.fixture(scope='module')
Expand All @@ -47,40 +44,8 @@ def test_tf(sagemaker_session):
estimator.fit(inputs)
print('job succeeded: {}'.format(estimator.latest_training_job.name))

try:
with timeout(minutes=15):
json_predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge')

result = json_predictor.predict([6.4, 3.2, 4.5, 1.5])
print('predict result: {}'.format(result))
finally:
try:
estimator.delete_endpoint()
except Exception:
pass


def test_cifar(sagemaker_session):
with timeout(minutes=15):
script_path = os.path.join(DATA_DIR, 'cifar_10', 'source')

dataset_path = os.path.join(DATA_DIR, 'cifar_10', 'data')

estimator = TensorFlow(entry_point='resnet_cifar_10.py', source_dir=script_path, role='SageMakerRole',
training_steps=20, evaluation_steps=5,
train_instance_count=2, train_instance_type='ml.p2.xlarge',
sagemaker_session=sagemaker_session,
base_job_name='test-cifar')

inputs = estimator.sagemaker_session.upload_data(path=dataset_path, key_prefix='data/cifar10')
estimator.fit(inputs)
print('job succeeded: {}'.format(estimator.latest_training_job.name))
with timeout_and_delete_endpoint(estimator=estimator, minutes=15):
json_predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge')

try:
with timeout(minutes=15):
estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge')
finally:
try:
estimator.delete_endpoint()
except Exception:
pass
result = json_predictor.predict([6.4, 3.2, 4.5, 1.5])
print('predict result: {}'.format(result))
45 changes: 45 additions & 0 deletions tests/integ/test_tf_cifar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import boto3
import os
import pytest

from sagemaker import Session
from sagemaker.tensorflow import TensorFlow
from tests.integ import DATA_DIR, REGION
from tests.integ.timeout import timeout_and_delete_endpoint, timeout


@pytest.fixture(scope='module')
def sagemaker_session():
return Session(boto_session=boto3.Session(region_name=REGION))


def test_cifar(sagemaker_session):
with timeout(minutes=15):
script_path = os.path.join(DATA_DIR, 'cifar_10', 'source')

dataset_path = os.path.join(DATA_DIR, 'cifar_10', 'data')

estimator = TensorFlow(entry_point='resnet_cifar_10.py', source_dir=script_path, role='SageMakerRole',
training_steps=20, evaluation_steps=5,
train_instance_count=2, train_instance_type='ml.p2.xlarge',
sagemaker_session=sagemaker_session,
base_job_name='test-cifar')

inputs = estimator.sagemaker_session.upload_data(path=dataset_path, key_prefix='data/cifar10')
estimator.fit(inputs)
print('job succeeded: {}'.format(estimator.latest_training_job.name))

with timeout_and_delete_endpoint(estimator=estimator, minutes=15):
estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge')
23 changes: 23 additions & 0 deletions tests/integ/timeout.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
# language governing permissions and limitations under the License.
import signal
from contextlib import contextmanager
import logging

LOGGER = logging.getLogger('timeout')


class TimeoutError(Exception):
Expand Down Expand Up @@ -48,3 +51,23 @@ def handler(signum, frame):
yield
finally:
signal.alarm(0)


@contextmanager
def timeout_and_delete_endpoint(estimator, seconds=0, minutes=0, hours=0):
with timeout(seconds=seconds, minutes=minutes, hours=hours) as t:
try:
yield [t]
finally:
estimator.delete_endpoint()
LOGGER.info('deleted endpoint')


@contextmanager
def timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, seconds=0, minutes=0, hours=0):
with timeout(seconds=seconds, minutes=minutes, hours=hours) as t:
try:
yield [t]
finally:
sagemaker_session.delete_endpoint(endpoint_name)
LOGGER.info('deleted endpoint {}'.format(endpoint_name))
4 changes: 4 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ exclude =
max-complexity = 10

[testenv]
# TEAMCITY_VERSION environment variable exists during build on Teamcity. teamcity-messages uses it in order to enable
# reporting to TeamCity.
passenv = TEAMCITY_VERSION
# {posargs} can be passed in by additional arguments specified when invoking tox.
# Can be used to specify which tests to run, e.g.: tox -- -s
commands =
Expand All @@ -31,6 +34,7 @@ deps =
tensorflow
mock
contextlib2
teamcity-messages

[testenv:flake8]
basepython = python
Expand Down