Skip to content

fix: make marketplace jobnames random #2708

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Oct 20, 2021
11 changes: 5 additions & 6 deletions tests/integ/test_marketplace.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
from sagemaker import AlgorithmEstimator, ModelPackage
from sagemaker.serializers import CSVSerializer
from sagemaker.tuner import IntegerParameter, HyperparameterTuner
from sagemaker.utils import sagemaker_timestamp
from sagemaker.utils import _aws_partition
from sagemaker.utils import sagemaker_timestamp, _aws_partition, unique_name_from_base
from tests.integ import DATA_DIR
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
from tests.integ.marketplace_utils import REGION_ACCOUNT_MAP
Expand Down Expand Up @@ -117,7 +116,7 @@ def test_marketplace_attach(sagemaker_session, cpu_instance_type):
instance_count=1,
instance_type=cpu_instance_type,
sagemaker_session=sagemaker_session,
base_job_name="test-marketplace",
base_job_name=unique_name_from_base("test-marketplace"),
)

train_input = mktplace.sagemaker_session.upload_data(
Expand Down Expand Up @@ -205,7 +204,7 @@ def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type):
instance_count=1,
instance_type=cpu_instance_type,
sagemaker_session=sagemaker_session,
base_job_name="test-marketplace",
base_job_name=unique_name_from_base("test-marketplace"),
)

train_input = mktplace.sagemaker_session.upload_data(
Expand All @@ -218,7 +217,7 @@ def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type):

tuner = HyperparameterTuner(
estimator=mktplace,
base_tuning_job_name="byo",
base_tuning_job_name=unique_name_from_base("byo"),
objective_metric_name="validation:accuracy",
hyperparameter_ranges=hyperparameter_ranges,
max_jobs=2,
Expand Down Expand Up @@ -248,7 +247,7 @@ def test_marketplace_transform_job(sagemaker_session, cpu_instance_type):
instance_count=1,
instance_type=cpu_instance_type,
sagemaker_session=sagemaker_session,
base_job_name="test-marketplace",
base_job_name=unique_name_from_base("test-marketplace"),
)

train_input = algo.sagemaker_session.upload_data(
Expand Down
6 changes: 5 additions & 1 deletion tests/integ/test_model_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,11 @@ def predictor(sagemaker_session, tensorflow_inference_latest_version):
key_prefix="tensorflow-serving/models",
)
with tests.integ.timeout.timeout_and_delete_endpoint_by_name(
endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, hours=2
endpoint_name=endpoint_name,
sagemaker_session=sagemaker_session,
hours=2,
sleep_between_cleanup_attempts=20,
exponential_sleep=True,
):
model = TensorFlowModel(
model_data=model_data,
Expand Down
13 changes: 11 additions & 2 deletions tests/integ/timeout.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def timeout_and_delete_endpoint_by_name(
minutes=45,
hours=0,
sleep_between_cleanup_attempts=10,
exponential_sleep=False,
):
limit = seconds + 60 * minutes + 3600 * hours

Expand Down Expand Up @@ -83,7 +84,13 @@ def timeout_and_delete_endpoint_by_name(
# avoids the inner exception to be overwritten
pass
# trying to delete the resource again in 10 seconds
sleep(sleep_between_cleanup_attempts)
if exponential_sleep:
_sleep_between_cleanup_attempts = sleep_between_cleanup_attempts * (
3 - attempts
)
else:
_sleep_between_cleanup_attempts = sleep_between_cleanup_attempts
sleep(_sleep_between_cleanup_attempts)


@contextmanager
Expand Down Expand Up @@ -150,7 +157,9 @@ def _delete_schedules_associated_with_endpoint(sagemaker_session, endpoint_name)
monitor.delete_monitoring_schedule()
except Exception as e:
LOGGER.warning(
"Failed to delete monitor {}".format(monitor.monitoring_schedule_name), e
"Failed to delete monitor {},\nError: {}".format(
monitor.monitoring_schedule_name, e
)
)


Expand Down
36 changes: 35 additions & 1 deletion tests/unit/test_timeout.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import time

import pytest
from mock import Mock, patch
from mock import Mock, patch, call
import stopit

from botocore.exceptions import ClientError
Expand All @@ -44,6 +44,7 @@
LONG_DURATION_TO_EXCEED_TIMEOUT = 0.002
LONG_TIMEOUT_THAT_WILL_NEVER_BE_EXCEEDED = 10
DURATION_TO_SLEEP_TO_ALLOW_BACKGROUND_THREAD_TO_COMPLETE = 0.2
DURATION_TO_SLEEP = 0.01


@pytest.fixture()
Expand Down Expand Up @@ -174,6 +175,39 @@ def test_timeout_and_delete_endpoint_by_name_retries_resource_deletion_on_failur
assert session.delete_endpoint.call_count == 3


@patch("tests.integ.timeout._show_logs", return_value=None, autospec=True)
@patch("tests.integ.timeout._cleanup_logs", return_value=None, autospec=True)
@patch(
"tests.integ.timeout._delete_schedules_associated_with_endpoint",
return_value=None,
autospec=True,
)
@patch("tests.integ.timeout.sleep", return_value=None)
def test_timeout_and_delete_endpoint_by_name_retries_resource_deletion_on_failure_with_exp_sleep(
mock_sleep, _show_logs, _cleanup_logs, _delete_schedules_associated_with_endpoint, session
):
session.delete_endpoint = Mock(
side_effect=ClientError(
error_response={"Error": {"Code": 403, "Message": "ValidationException"}},
operation_name="Unit Test",
)
)

with timeout_and_delete_endpoint_by_name(
endpoint_name=ENDPOINT_NAME,
sagemaker_session=session,
hours=0,
minutes=0,
seconds=LONG_TIMEOUT_THAT_WILL_NEVER_BE_EXCEEDED,
sleep_between_cleanup_attempts=DURATION_TO_SLEEP,
exponential_sleep=True,
):
pass
assert session.delete_endpoint.call_count == 3
assert mock_sleep.call_count == 3
assert mock_sleep.mock_calls == [call(0.01), call(0.02), call(0.03)]


@patch("tests.integ.timeout._show_logs", return_value=None, autospec=True)
@patch("tests.integ.timeout._cleanup_logs", return_value=None, autospec=True)
@patch(
Expand Down