From cad0c71d640704ba7bf80251627def7124ff0933 Mon Sep 17 00:00:00 2001 From: Marcio Dos Santos Date: Mon, 24 Sep 2018 18:11:15 -0700 Subject: [PATCH 1/2] Set health check timeout to 30 seconds --- src/sagemaker/local/local_session.py | 4 +++- tests/unit/test_local_session.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/local/local_session.py b/src/sagemaker/local/local_session.py index ff0586a4d7..4b6ba36eef 100644 --- a/src/sagemaker/local/local_session.py +++ b/src/sagemaker/local/local_session.py @@ -25,6 +25,8 @@ from sagemaker.session import Session from sagemaker.utils import get_config_value +HEALTH_CHECK_TIMEOUT_LIMIT = 30 + logger = logging.getLogger(__name__) logger.setLevel(logging.WARNING) @@ -123,7 +125,7 @@ def create_endpoint(self, EndpointName, EndpointConfigName): endpoint_url = "http://localhost:%s/ping" % serving_port while True: i += 1 - if i >= 10: + if i >= HEALTH_CHECK_TIMEOUT_LIMIT: raise RuntimeError("Giving up, endpoint: %s didn't launch correctly" % EndpointName) logger.info("Checking if endpoint is up, attempt: %s" % i) diff --git a/tests/unit/test_local_session.py b/tests/unit/test_local_session.py index f39644464f..5b5ad10744 100644 --- a/tests/unit/test_local_session.py +++ b/tests/unit/test_local_session.py @@ -204,7 +204,7 @@ def test_create_endpoint(serve, request, LocalSession): @patch('urllib3.PoolManager.request', return_value=BAD_RESPONSE) @patch('sagemaker.local.local_session.LocalSession') @patch('time.sleep') -def test_create_endpoint_fails(*args): +def test_create_endpoint_fails(sleep, *args): local_sagemaker_client = sagemaker.local.local_session.LocalSagemakerClient() local_sagemaker_client.variants = [{'InstanceType': 'ml.c4.99xlarge', 'InitialInstanceCount': 10}] local_sagemaker_client.primary_container = {'ModelDataUrl': '/some/model/path', @@ -214,6 +214,8 @@ def test_create_endpoint_fails(*args): with pytest.raises(RuntimeError): local_sagemaker_client.create_endpoint('my-endpoint', 'some-endpoint-config') + assert 29 == sleep.call_count + def test_file_input_all_defaults(): prefix = 'pre' From 00885b4271a3149f695399338b421676c6d3440c Mon Sep 17 00:00:00 2001 From: Marcio Dos Santos Date: Mon, 24 Sep 2018 18:16:53 -0700 Subject: [PATCH 2/2] Update changelog --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2d352a3a62..fc26a06875 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,7 @@ CHANGELOG * doc-fix: add deprecation warning for current MXNet training script format * doc-fix: add docs on deploying TensorFlow model directly from existing model * doc-fix: fix code example for using Gzip compression for TensorFlow training data +* bug-fix: Setting health check timeout limit on local mode to 30s 1.10.0 ======