Skip to content

Commit eb5099c

Browse files
mvsuspyangaws
authored andcommitted
Deleting endpoint for PyTorch test, retrying endpoint deletion 3 times (#297)
1 parent 2e5577f commit eb5099c

File tree

2 files changed

+21
-12
lines changed

2 files changed

+21
-12
lines changed

tests/integ/test_pytorch_train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def fixture_training_job(sagemaker_session, pytorch_full_version):
4141
def test_sync_fit_deploy(pytorch_training_job, sagemaker_session):
4242
# TODO: add tests against local mode when it's ready to be used
4343
endpoint_name = 'test-pytorch-sync-fit-attach-deploy{}'.format(sagemaker_timestamp())
44-
with timeout(minutes=20):
44+
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20):
4545
estimator = PyTorch.attach(pytorch_training_job, sagemaker_session=sagemaker_session)
4646
predictor = estimator.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)
4747
data = numpy.zeros(shape=(1, 1, 28, 28), dtype=numpy.float32)

tests/integ/timeout.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import signal
1616
from contextlib import contextmanager
1717
import logging
18+
from time import sleep
19+
1820
from awslogs.core import AWSLogs
1921
from botocore.exceptions import ClientError
2022

@@ -65,17 +67,24 @@ def timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, second
6567
yield [t]
6668
no_errors = True
6769
finally:
68-
try:
69-
sagemaker_session.delete_endpoint(endpoint_name)
70-
LOGGER.info('deleted endpoint {}'.format(endpoint_name))
71-
72-
_show_endpoint_logs(endpoint_name, sagemaker_session)
73-
if no_errors:
74-
_cleanup_endpoint_logs(endpoint_name, sagemaker_session)
75-
except ClientError as ce:
76-
if ce.response['Error']['Code'] == 'ValidationException':
77-
# avoids the inner exception to be overwritten
78-
pass
70+
attempts = 3
71+
72+
while attempts > 0:
73+
attempts -= 1
74+
try:
75+
sagemaker_session.delete_endpoint(endpoint_name)
76+
LOGGER.info('deleted endpoint {}'.format(endpoint_name))
77+
78+
_show_endpoint_logs(endpoint_name, sagemaker_session)
79+
if no_errors:
80+
_cleanup_endpoint_logs(endpoint_name, sagemaker_session)
81+
return
82+
except ClientError as ce:
83+
if ce.response['Error']['Code'] == 'ValidationException':
84+
# avoids the inner exception to be overwritten
85+
pass
86+
# trying to delete the resource again in 10 seconds
87+
sleep(10)
7988

8089

8190
def _show_endpoint_logs(endpoint_name, sagemaker_session):

0 commit comments

Comments
 (0)