From 8a4245651b74967b3f408bea57a576b97ab50c94 Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Fri, 15 Oct 2021 00:06:45 -0700 Subject: [PATCH 1/4] fix: make marketplace jobnames random --- tests/integ/marketplace_utils.py | 5 +++++ tests/integ/test_marketplace.py | 10 +++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/integ/marketplace_utils.py b/tests/integ/marketplace_utils.py index a0b7515a97..99ba6b6af8 100644 --- a/tests/integ/marketplace_utils.py +++ b/tests/integ/marketplace_utils.py @@ -11,6 +11,7 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from __future__ import absolute_import +import uuid REGION_ACCOUNT_MAP = { "us-east-1": "865070037744", @@ -34,3 +35,7 @@ "cn-north-1": "295401494951", "cn-northwest-1": "304690803264", } + + +def random_8_digit_alpha_numeric_gen(): + return str(uuid.uuid4().hex.upper()[0:8]) diff --git a/tests/integ/test_marketplace.py b/tests/integ/test_marketplace.py index 208fa199d5..4ee14729c3 100644 --- a/tests/integ/test_marketplace.py +++ b/tests/integ/test_marketplace.py @@ -28,7 +28,7 @@ from sagemaker.utils import _aws_partition from tests.integ import DATA_DIR from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name -from tests.integ.marketplace_utils import REGION_ACCOUNT_MAP +from tests.integ.marketplace_utils import REGION_ACCOUNT_MAP, random_8_digit_alpha_numeric_gen # All these tests require a manual 1 time subscription to the following Marketplace items: @@ -117,7 +117,7 @@ def test_marketplace_attach(sagemaker_session, cpu_instance_type): instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - base_job_name="test-marketplace", + base_job_name="test-marketplace" + random_8_digit_alpha_numeric_gen(), ) train_input = mktplace.sagemaker_session.upload_data( @@ -205,7 +205,7 @@ def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type): instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - base_job_name="test-marketplace", + base_job_name="test-marketplace" + random_8_digit_alpha_numeric_gen(), ) train_input = mktplace.sagemaker_session.upload_data( @@ -218,7 +218,7 @@ def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type): tuner = HyperparameterTuner( estimator=mktplace, - base_tuning_job_name="byo", + base_tuning_job_name="byo" + random_8_digit_alpha_numeric_gen(), objective_metric_name="validation:accuracy", hyperparameter_ranges=hyperparameter_ranges, max_jobs=2, @@ -248,7 +248,7 @@ def test_marketplace_transform_job(sagemaker_session, cpu_instance_type): instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - base_job_name="test-marketplace", + base_job_name="test-marketplace" + random_8_digit_alpha_numeric_gen(), ) train_input = algo.sagemaker_session.upload_data( From 98f2a02a99febbbd5ab8f7efbc9eef9e9c2585f4 Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Fri, 15 Oct 2021 16:16:50 -0700 Subject: [PATCH 2/4] fix: make marketplace jobnames unique --- tests/integ/marketplace_utils.py | 5 ----- tests/integ/test_marketplace.py | 13 ++++++------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/tests/integ/marketplace_utils.py b/tests/integ/marketplace_utils.py index 99ba6b6af8..a0b7515a97 100644 --- a/tests/integ/marketplace_utils.py +++ b/tests/integ/marketplace_utils.py @@ -11,7 +11,6 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from __future__ import absolute_import -import uuid REGION_ACCOUNT_MAP = { "us-east-1": "865070037744", @@ -35,7 +34,3 @@ "cn-north-1": "295401494951", "cn-northwest-1": "304690803264", } - - -def random_8_digit_alpha_numeric_gen(): - return str(uuid.uuid4().hex.upper()[0:8]) diff --git a/tests/integ/test_marketplace.py b/tests/integ/test_marketplace.py index 4ee14729c3..f8f6f2b3a4 100644 --- a/tests/integ/test_marketplace.py +++ b/tests/integ/test_marketplace.py @@ -24,11 +24,10 @@ from sagemaker import AlgorithmEstimator, ModelPackage from sagemaker.serializers import CSVSerializer from sagemaker.tuner import IntegerParameter, HyperparameterTuner -from sagemaker.utils import sagemaker_timestamp -from sagemaker.utils import _aws_partition +from sagemaker.utils import sagemaker_timestamp, _aws_partition, unique_name_from_base from tests.integ import DATA_DIR from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name -from tests.integ.marketplace_utils import REGION_ACCOUNT_MAP, random_8_digit_alpha_numeric_gen +from tests.integ.marketplace_utils import REGION_ACCOUNT_MAP # All these tests require a manual 1 time subscription to the following Marketplace items: @@ -117,7 +116,7 @@ def test_marketplace_attach(sagemaker_session, cpu_instance_type): instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - base_job_name="test-marketplace" + random_8_digit_alpha_numeric_gen(), + base_job_name=unique_name_from_base("test-marketplace"), ) train_input = mktplace.sagemaker_session.upload_data( @@ -205,7 +204,7 @@ def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type): instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - base_job_name="test-marketplace" + random_8_digit_alpha_numeric_gen(), + base_job_name=unique_name_from_base("test-marketplace"), ) train_input = mktplace.sagemaker_session.upload_data( @@ -218,7 +217,7 @@ def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type): tuner = HyperparameterTuner( estimator=mktplace, - base_tuning_job_name="byo" + random_8_digit_alpha_numeric_gen(), + base_tuning_job_name=unique_name_from_base("byo"), objective_metric_name="validation:accuracy", hyperparameter_ranges=hyperparameter_ranges, max_jobs=2, @@ -248,7 +247,7 @@ def test_marketplace_transform_job(sagemaker_session, cpu_instance_type): instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, - base_job_name="test-marketplace" + random_8_digit_alpha_numeric_gen(), + base_job_name=unique_name_from_base("test-marketplace"), ) train_input = algo.sagemaker_session.upload_data( From 2e0765613abe5c292afafd056c391bfd70c8c229 Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Sun, 17 Oct 2021 23:29:31 -0700 Subject: [PATCH 3/4] Exponentially sleep during enpoint cleanup --- tests/integ/test_model_monitor.py | 6 +++++- tests/integ/timeout.py | 9 ++++++-- tests/unit/test_timeout.py | 36 ++++++++++++++++++++++++++++++- 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/tests/integ/test_model_monitor.py b/tests/integ/test_model_monitor.py index 5ddce5b589..17b42b47f9 100644 --- a/tests/integ/test_model_monitor.py +++ b/tests/integ/test_model_monitor.py @@ -92,7 +92,11 @@ def predictor(sagemaker_session, tensorflow_inference_latest_version): key_prefix="tensorflow-serving/models", ) with tests.integ.timeout.timeout_and_delete_endpoint_by_name( - endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, hours=2 + endpoint_name=endpoint_name, + sagemaker_session=sagemaker_session, + hours=2, + sleep_between_cleanup_attempts=20, + exponential_sleep=True, ): model = TensorFlowModel( model_data=model_data, diff --git a/tests/integ/timeout.py b/tests/integ/timeout.py index 6ac6d027c6..401977203c 100644 --- a/tests/integ/timeout.py +++ b/tests/integ/timeout.py @@ -54,6 +54,7 @@ def timeout_and_delete_endpoint_by_name( minutes=45, hours=0, sleep_between_cleanup_attempts=10, + exponential_sleep=False, ): limit = seconds + 60 * minutes + 3600 * hours @@ -83,7 +84,11 @@ def timeout_and_delete_endpoint_by_name( # avoids the inner exception to be overwritten pass # trying to delete the resource again in 10 seconds - sleep(sleep_between_cleanup_attempts) + if exponential_sleep: + _sleep_between_cleanup_attempts = (sleep_between_cleanup_attempts * (3 - attempts)) + else: + _sleep_between_cleanup_attempts = sleep_between_cleanup_attempts + sleep(_sleep_between_cleanup_attempts) @contextmanager @@ -150,7 +155,7 @@ def _delete_schedules_associated_with_endpoint(sagemaker_session, endpoint_name) monitor.delete_monitoring_schedule() except Exception as e: LOGGER.warning( - "Failed to delete monitor {}".format(monitor.monitoring_schedule_name), e + "Failed to delete monitor {},\nError: {}".format(monitor.monitoring_schedule_name, e) ) diff --git a/tests/unit/test_timeout.py b/tests/unit/test_timeout.py index 22eea1dd59..8f74fa495f 100644 --- a/tests/unit/test_timeout.py +++ b/tests/unit/test_timeout.py @@ -19,7 +19,7 @@ import time import pytest -from mock import Mock, patch +from mock import Mock, patch, call import stopit from botocore.exceptions import ClientError @@ -44,6 +44,7 @@ LONG_DURATION_TO_EXCEED_TIMEOUT = 0.002 LONG_TIMEOUT_THAT_WILL_NEVER_BE_EXCEEDED = 10 DURATION_TO_SLEEP_TO_ALLOW_BACKGROUND_THREAD_TO_COMPLETE = 0.2 +DURATION_TO_SLEEP = 0.01 @pytest.fixture() @@ -174,6 +175,39 @@ def test_timeout_and_delete_endpoint_by_name_retries_resource_deletion_on_failur assert session.delete_endpoint.call_count == 3 +@patch("tests.integ.timeout._show_logs", return_value=None, autospec=True) +@patch("tests.integ.timeout._cleanup_logs", return_value=None, autospec=True) +@patch( + "tests.integ.timeout._delete_schedules_associated_with_endpoint", + return_value=None, + autospec=True, +) +@patch('tests.integ.timeout.sleep', return_value=None) +def test_timeout_and_delete_endpoint_by_name_retries_resource_deletion_on_failure_with_exp_sleep( + mock_sleep, _show_logs, _cleanup_logs, _delete_schedules_associated_with_endpoint, session +): + session.delete_endpoint = Mock( + side_effect=ClientError( + error_response={"Error": {"Code": 403, "Message": "ValidationException"}}, + operation_name="Unit Test", + ) + ) + + with timeout_and_delete_endpoint_by_name( + endpoint_name=ENDPOINT_NAME, + sagemaker_session=session, + hours=0, + minutes=0, + seconds=LONG_TIMEOUT_THAT_WILL_NEVER_BE_EXCEEDED, + sleep_between_cleanup_attempts=DURATION_TO_SLEEP, + exponential_sleep=True, + ): + pass + assert session.delete_endpoint.call_count == 3 + assert mock_sleep.call_count == 3 + assert mock_sleep.mock_calls == [call(0.01), call(0.02), call(0.03)] + + @patch("tests.integ.timeout._show_logs", return_value=None, autospec=True) @patch("tests.integ.timeout._cleanup_logs", return_value=None, autospec=True) @patch( From e59883a2b0eb18a1ab1fd393a7f818badf65f768 Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Tue, 19 Oct 2021 10:43:49 -0700 Subject: [PATCH 4/4] fix unit test formatting errors --- tests/integ/timeout.py | 8 ++++++-- tests/unit/test_timeout.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integ/timeout.py b/tests/integ/timeout.py index 401977203c..f45a136254 100644 --- a/tests/integ/timeout.py +++ b/tests/integ/timeout.py @@ -85,7 +85,9 @@ def timeout_and_delete_endpoint_by_name( pass # trying to delete the resource again in 10 seconds if exponential_sleep: - _sleep_between_cleanup_attempts = (sleep_between_cleanup_attempts * (3 - attempts)) + _sleep_between_cleanup_attempts = sleep_between_cleanup_attempts * ( + 3 - attempts + ) else: _sleep_between_cleanup_attempts = sleep_between_cleanup_attempts sleep(_sleep_between_cleanup_attempts) @@ -155,7 +157,9 @@ def _delete_schedules_associated_with_endpoint(sagemaker_session, endpoint_name) monitor.delete_monitoring_schedule() except Exception as e: LOGGER.warning( - "Failed to delete monitor {},\nError: {}".format(monitor.monitoring_schedule_name, e) + "Failed to delete monitor {},\nError: {}".format( + monitor.monitoring_schedule_name, e + ) ) diff --git a/tests/unit/test_timeout.py b/tests/unit/test_timeout.py index 8f74fa495f..6d40a2e6dd 100644 --- a/tests/unit/test_timeout.py +++ b/tests/unit/test_timeout.py @@ -182,7 +182,7 @@ def test_timeout_and_delete_endpoint_by_name_retries_resource_deletion_on_failur return_value=None, autospec=True, ) -@patch('tests.integ.timeout.sleep', return_value=None) +@patch("tests.integ.timeout.sleep", return_value=None) def test_timeout_and_delete_endpoint_by_name_retries_resource_deletion_on_failure_with_exp_sleep( mock_sleep, _show_logs, _cleanup_logs, _delete_schedules_associated_with_endpoint, session ):