diff --git a/tests/integ/test_marketplace.py b/tests/integ/test_marketplace.py index 208fa199d5..f8f6f2b3a4 100644 --- a/tests/integ/test_marketplace.py +++ b/tests/integ/test_marketplace.py @@ -24,8 +24,7 @@ from sagemaker import AlgorithmEstimator, ModelPackage from sagemaker.serializers import CSVSerializer from sagemaker.tuner import IntegerParameter, HyperparameterTuner -from sagemaker.utils import sagemaker_timestamp -from sagemaker.utils import _aws_partition +from sagemaker.utils import sagemaker_timestamp, _aws_partition, unique_name_from_base from tests.integ import DATA_DIR from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name from tests.integ.marketplace_utils import REGION_ACCOUNT_MAP @@ -117,7 +116,7 @@ def test_marketplace_attach(sagemaker_session, cpu_instance_type): instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - base_job_name="test-marketplace", + base_job_name=unique_name_from_base("test-marketplace"), ) train_input = mktplace.sagemaker_session.upload_data( @@ -205,7 +204,7 @@ def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type): instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - base_job_name="test-marketplace", + base_job_name=unique_name_from_base("test-marketplace"), ) train_input = mktplace.sagemaker_session.upload_data( @@ -218,7 +217,7 @@ def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type): tuner = HyperparameterTuner( estimator=mktplace, - base_tuning_job_name="byo", + base_tuning_job_name=unique_name_from_base("byo"), objective_metric_name="validation:accuracy", hyperparameter_ranges=hyperparameter_ranges, max_jobs=2, @@ -248,7 +247,7 @@ def test_marketplace_transform_job(sagemaker_session, cpu_instance_type): instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - base_job_name="test-marketplace", + base_job_name=unique_name_from_base("test-marketplace"), ) train_input = algo.sagemaker_session.upload_data( diff --git a/tests/integ/test_model_monitor.py b/tests/integ/test_model_monitor.py index 5ddce5b589..17b42b47f9 100644 --- a/tests/integ/test_model_monitor.py +++ b/tests/integ/test_model_monitor.py @@ -92,7 +92,11 @@ def predictor(sagemaker_session, tensorflow_inference_latest_version): key_prefix="tensorflow-serving/models", ) with tests.integ.timeout.timeout_and_delete_endpoint_by_name( - endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, hours=2 + endpoint_name=endpoint_name, + sagemaker_session=sagemaker_session, + hours=2, + sleep_between_cleanup_attempts=20, + exponential_sleep=True, ): model = TensorFlowModel( model_data=model_data, diff --git a/tests/integ/timeout.py b/tests/integ/timeout.py index 6ac6d027c6..f45a136254 100644 --- a/tests/integ/timeout.py +++ b/tests/integ/timeout.py @@ -54,6 +54,7 @@ def timeout_and_delete_endpoint_by_name( minutes=45, hours=0, sleep_between_cleanup_attempts=10, + exponential_sleep=False, ): limit = seconds + 60 * minutes + 3600 * hours @@ -83,7 +84,13 @@ def timeout_and_delete_endpoint_by_name( # avoids the inner exception to be overwritten pass # trying to delete the resource again in 10 seconds - sleep(sleep_between_cleanup_attempts) + if exponential_sleep: + _sleep_between_cleanup_attempts = sleep_between_cleanup_attempts * ( + 3 - attempts + ) + else: + _sleep_between_cleanup_attempts = sleep_between_cleanup_attempts + sleep(_sleep_between_cleanup_attempts) @contextmanager @@ -150,7 +157,9 @@ def _delete_schedules_associated_with_endpoint(sagemaker_session, endpoint_name) monitor.delete_monitoring_schedule() except Exception as e: LOGGER.warning( - "Failed to delete monitor {}".format(monitor.monitoring_schedule_name), e + "Failed to delete monitor {},\nError: {}".format( + monitor.monitoring_schedule_name, e + ) ) diff --git a/tests/unit/test_timeout.py b/tests/unit/test_timeout.py index 22eea1dd59..6d40a2e6dd 100644 --- a/tests/unit/test_timeout.py +++ b/tests/unit/test_timeout.py @@ -19,7 +19,7 @@ import time import pytest -from mock import Mock, patch +from mock import Mock, patch, call import stopit from botocore.exceptions import ClientError @@ -44,6 +44,7 @@ LONG_DURATION_TO_EXCEED_TIMEOUT = 0.002 LONG_TIMEOUT_THAT_WILL_NEVER_BE_EXCEEDED = 10 DURATION_TO_SLEEP_TO_ALLOW_BACKGROUND_THREAD_TO_COMPLETE = 0.2 +DURATION_TO_SLEEP = 0.01 @pytest.fixture() @@ -174,6 +175,39 @@ def test_timeout_and_delete_endpoint_by_name_retries_resource_deletion_on_failur assert session.delete_endpoint.call_count == 3 +@patch("tests.integ.timeout._show_logs", return_value=None, autospec=True) +@patch("tests.integ.timeout._cleanup_logs", return_value=None, autospec=True) +@patch( + "tests.integ.timeout._delete_schedules_associated_with_endpoint", + return_value=None, + autospec=True, +) +@patch("tests.integ.timeout.sleep", return_value=None) +def test_timeout_and_delete_endpoint_by_name_retries_resource_deletion_on_failure_with_exp_sleep( + mock_sleep, _show_logs, _cleanup_logs, _delete_schedules_associated_with_endpoint, session +): + session.delete_endpoint = Mock( + side_effect=ClientError( + error_response={"Error": {"Code": 403, "Message": "ValidationException"}}, + operation_name="Unit Test", + ) + ) + + with timeout_and_delete_endpoint_by_name( + endpoint_name=ENDPOINT_NAME, + sagemaker_session=session, + hours=0, + minutes=0, + seconds=LONG_TIMEOUT_THAT_WILL_NEVER_BE_EXCEEDED, + sleep_between_cleanup_attempts=DURATION_TO_SLEEP, + exponential_sleep=True, + ): + pass + assert session.delete_endpoint.call_count == 3 + assert mock_sleep.call_count == 3 + assert mock_sleep.mock_calls == [call(0.01), call(0.02), call(0.03)] + + @patch("tests.integ.timeout._show_logs", return_value=None, autospec=True) @patch("tests.integ.timeout._cleanup_logs", return_value=None, autospec=True) @patch(