Skip to content

Commit 8bcc9e8

Browse files
authored
feature: support customized timeout for model data download and inference container startup health check for Hosting Endpoints (#3388)
1 parent ce7e4cd commit 8bcc9e8

File tree

11 files changed

+262
-2
lines changed

11 files changed

+262
-2
lines changed

src/sagemaker/automl/automl.py

+16
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,9 @@ def deploy(
350350
model_kms_key=None,
351351
predictor_cls=None,
352352
inference_response_keys=None,
353+
volume_size=None,
354+
model_data_download_timeout=None,
355+
container_startup_health_check_timeout=None,
353356
):
354357
"""Deploy a candidate to a SageMaker Inference Pipeline.
355358
@@ -396,6 +399,16 @@ def deploy(
396399
function on the created endpoint name.
397400
inference_response_keys (list): List of keys for response content. The order of the
398401
keys will dictate the content order in the response.
402+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
403+
inference instance associated with the production variant. Currenly only Amazon EBS
404+
gp2 storage volumes are supported.
405+
model_data_download_timeout (int): The timeout value, in seconds, to download and
406+
extract model data from Amazon S3 to the individual inference instance associated
407+
with this production variant.
408+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
409+
inference container to pass health check by SageMaker Hosting. For more information
410+
about health check see:
411+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
399412
400413
Returns:
401414
callable[string, sagemaker.session.Session] or ``None``:
@@ -423,6 +436,9 @@ def deploy(
423436
kms_key=model_kms_key,
424437
tags=tags,
425438
wait=wait,
439+
volume_size=volume_size,
440+
model_data_download_timeout=model_data_download_timeout,
441+
container_startup_health_check_timeout=container_startup_health_check_timeout,
426442
)
427443

428444
def _check_problem_type_and_job_objective(self, problem_type, job_objective):

src/sagemaker/estimator.py

+16
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,9 @@ def deploy(
13041304
tags=None,
13051305
serverless_inference_config=None,
13061306
async_inference_config=None,
1307+
volume_size=None,
1308+
model_data_download_timeout=None,
1309+
container_startup_health_check_timeout=None,
13071310
**kwargs,
13081311
):
13091312
"""Deploy the trained model to an Amazon SageMaker endpoint.
@@ -1371,6 +1374,16 @@ def deploy(
13711374
For more information about tags, see
13721375
https://boto3.amazonaws.com/v1/documentation\
13731376
/api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags
1377+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
1378+
inference instance associated with the production variant. Currenly only Amazon EBS
1379+
gp2 storage volumes are supported.
1380+
model_data_download_timeout (int): The timeout value, in seconds, to download and
1381+
extract model data from Amazon S3 to the individual inference instance associated
1382+
with this production variant.
1383+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
1384+
inference container to pass health check by SageMaker Hosting. For more information
1385+
about health check see:
1386+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
13741387
**kwargs: Passed to invocation of ``create_model()``.
13751388
Implementations may customize ``create_model()`` to accept
13761389
``**kwargs`` to customize model creation during deploy.
@@ -1429,6 +1442,9 @@ def deploy(
14291442
data_capture_config=data_capture_config,
14301443
serverless_inference_config=serverless_inference_config,
14311444
async_inference_config=async_inference_config,
1445+
volume_size=volume_size,
1446+
model_data_download_timeout=model_data_download_timeout,
1447+
container_startup_health_check_timeout=container_startup_health_check_timeout,
14321448
)
14331449

14341450
def register(

src/sagemaker/huggingface/model.py

+16
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,9 @@ def deploy(
206206
data_capture_config=None,
207207
async_inference_config=None,
208208
serverless_inference_config=None,
209+
volume_size=None,
210+
model_data_download_timeout=None,
211+
container_startup_health_check_timeout=None,
209212
**kwargs,
210213
):
211214
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -269,6 +272,16 @@ def deploy(
269272
empty object passed through, will use pre-defined values in
270273
``ServerlessInferenceConfig`` class to deploy serverless endpoint. Deploy an
271274
instance based endpoint if it's None. (default: None)
275+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
276+
inference instance associated with the production variant. Currenly only Amazon EBS
277+
gp2 storage volumes are supported.
278+
model_data_download_timeout (int): The timeout value, in seconds, to download and
279+
extract model data from Amazon S3 to the individual inference instance associated
280+
with this production variant.
281+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
282+
inference container to pass health check by SageMaker Hosting. For more information
283+
about health check see:
284+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
272285
Raises:
273286
ValueError: If arguments combination check failed in these circumstances:
274287
- If no role is specified or
@@ -301,6 +314,9 @@ def deploy(
301314
data_capture_config,
302315
async_inference_config,
303316
serverless_inference_config,
317+
volume_size=volume_size,
318+
model_data_download_timeout=model_data_download_timeout,
319+
container_startup_health_check_timeout=container_startup_health_check_timeout,
304320
)
305321

306322
def register(

src/sagemaker/model.py

+16
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,9 @@ def deploy(
10291029
data_capture_config=None,
10301030
async_inference_config=None,
10311031
serverless_inference_config=None,
1032+
volume_size=None,
1033+
model_data_download_timeout=None,
1034+
container_startup_health_check_timeout=None,
10321035
**kwargs,
10331036
):
10341037
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -1092,6 +1095,16 @@ def deploy(
10921095
empty object passed through, will use pre-defined values in
10931096
``ServerlessInferenceConfig`` class to deploy serverless endpoint. Deploy an
10941097
instance based endpoint if it's None. (default: None)
1098+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
1099+
inference instance associated with the production variant. Currenly only Amazon EBS
1100+
gp2 storage volumes are supported.
1101+
model_data_download_timeout (int): The timeout value, in seconds, to download and
1102+
extract model data from Amazon S3 to the individual inference instance associated
1103+
with this production variant.
1104+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
1105+
inference container to pass health check by SageMaker Hosting. For more information
1106+
about health check see:
1107+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
10951108
Raises:
10961109
ValueError: If arguments combination check failed in these circumstances:
10971110
- If no role is specified or
@@ -1155,6 +1168,9 @@ def deploy(
11551168
initial_instance_count,
11561169
accelerator_type=accelerator_type,
11571170
serverless_inference_config=serverless_inference_config_dict,
1171+
volume_size=volume_size,
1172+
model_data_download_timeout=model_data_download_timeout,
1173+
container_startup_health_check_timeout=container_startup_health_check_timeout,
11581174
)
11591175
if endpoint_name:
11601176
self.endpoint_name = endpoint_name

src/sagemaker/pipeline.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ def deploy(
122122
update_endpoint=False,
123123
data_capture_config=None,
124124
kms_key=None,
125+
volume_size=None,
126+
model_data_download_timeout=None,
127+
container_startup_health_check_timeout=None,
125128
):
126129
"""Deploy the ``Model`` to an ``Endpoint``.
127130
@@ -170,6 +173,16 @@ def deploy(
170173
kms_key (str): The ARN, Key ID or Alias of the KMS key that is used to
171174
encrypt the data on the storage volume attached to the instance hosting
172175
the endpoint.
176+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
177+
inference instance associated with the production variant. Currenly only Amazon EBS
178+
gp2 storage volumes are supported.
179+
model_data_download_timeout (int): The timeout value, in seconds, to download and
180+
extract model data from Amazon S3 to the individual inference instance associated
181+
with this production variant.
182+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
183+
inference container to pass health check by SageMaker Hosting. For more information
184+
about health check see:
185+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
173186
174187
Returns:
175188
callable[string, sagemaker.session.Session] or None: Invocation of
@@ -191,7 +204,12 @@ def deploy(
191204
)
192205

193206
production_variant = sagemaker.production_variant(
194-
self.name, instance_type, initial_instance_count
207+
self.name,
208+
instance_type,
209+
initial_instance_count,
210+
volume_size=volume_size,
211+
model_data_download_timeout=model_data_download_timeout,
212+
container_startup_health_check_timeout=container_startup_health_check_timeout,
195213
)
196214
self.endpoint_name = endpoint_name or self.name
197215

@@ -208,6 +226,9 @@ def deploy(
208226
tags=tags,
209227
kms_key=kms_key,
210228
data_capture_config_dict=data_capture_config_dict,
229+
volume_size=volume_size,
230+
model_data_download_timeout=model_data_download_timeout,
231+
container_startup_health_check_timeout=container_startup_health_check_timeout,
211232
)
212233
self.sagemaker_session.update_endpoint(
213234
self.endpoint_name, endpoint_config_name, wait=wait

src/sagemaker/session.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -2981,6 +2981,9 @@ def create_endpoint_config(
29812981
tags=None,
29822982
kms_key=None,
29832983
data_capture_config_dict=None,
2984+
volume_size=None,
2985+
model_data_download_timeout=None,
2986+
container_startup_health_check_timeout=None,
29842987
):
29852988
"""Create an Amazon SageMaker endpoint configuration.
29862989
@@ -3004,6 +3007,16 @@ def create_endpoint_config(
30043007
attached to the instance hosting the endpoint.
30053008
data_capture_config_dict (dict): Specifies configuration related to Endpoint data
30063009
capture for use with Amazon SageMaker Model Monitoring. Default: None.
3010+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
3011+
inference instance associated with the production variant. Currenly only Amazon EBS
3012+
gp2 storage volumes are supported.
3013+
model_data_download_timeout (int): The timeout value, in seconds, to download and
3014+
extract model data from Amazon S3 to the individual inference instance associated
3015+
with this production variant.
3016+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
3017+
inference container to pass health check by SageMaker Hosting. For more information
3018+
about health check see:
3019+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
30073020
30083021
Example:
30093022
>>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}]
@@ -3025,6 +3038,9 @@ def create_endpoint_config(
30253038
instance_type,
30263039
initial_instance_count,
30273040
accelerator_type=accelerator_type,
3041+
volume_size=volume_size,
3042+
model_data_download_timeout=model_data_download_timeout,
3043+
container_startup_health_check_timeout=container_startup_health_check_timeout,
30283044
)
30293045
],
30303046
}
@@ -4636,6 +4652,9 @@ def production_variant(
46364652
initial_weight=1,
46374653
accelerator_type=None,
46384654
serverless_inference_config=None,
4655+
volume_size=None,
4656+
model_data_download_timeout=None,
4657+
container_startup_health_check_timeout=None,
46394658
):
46404659
"""Create a production variant description suitable for use in a ``ProductionVariant`` list.
46414660
@@ -4657,7 +4676,16 @@ def production_variant(
46574676
serverless_inference_config (dict): Specifies configuration dict related to serverless
46584677
endpoint. The dict is converted from sagemaker.model_monitor.ServerlessInferenceConfig
46594678
object (default: None)
4660-
4679+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
4680+
inference instance associated with the production variant. Currenly only Amazon EBS
4681+
gp2 storage volumes are supported.
4682+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
4683+
model data from Amazon S3 to the individual inference instance associated with this
4684+
production variant.
4685+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
4686+
inference container to pass health check by SageMaker Hosting. For more information
4687+
about health check see:
4688+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
46614689
Returns:
46624690
dict[str, str]: An SageMaker ``ProductionVariant`` description
46634691
"""
@@ -4676,6 +4704,12 @@ def production_variant(
46764704
initial_instance_count = initial_instance_count or 1
46774705
production_variant_configuration["InitialInstanceCount"] = initial_instance_count
46784706
production_variant_configuration["InstanceType"] = instance_type
4707+
update_args(
4708+
production_variant_configuration,
4709+
VolumeSizeInGB=volume_size,
4710+
ModelDataDownloadTimeoutInSeconds=model_data_download_timeout,
4711+
ContainerStartupHealthCheckTimeoutInSeconds=container_startup_health_check_timeout,
4712+
)
46794713

46804714
return production_variant_configuration
46814715

src/sagemaker/tensorflow/model.py

+6
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,9 @@ def deploy(
320320
update_endpoint=None,
321321
async_inference_config=None,
322322
serverless_inference_config=None,
323+
volume_size=None,
324+
model_data_download_timeout=None,
325+
container_startup_health_check_timeout=None,
323326
):
324327
"""Deploy a Tensorflow ``Model`` to a SageMaker ``Endpoint``."""
325328

@@ -340,6 +343,9 @@ def deploy(
340343
data_capture_config=data_capture_config,
341344
async_inference_config=async_inference_config,
342345
serverless_inference_config=serverless_inference_config,
346+
volume_size=volume_size,
347+
model_data_download_timeout=model_data_download_timeout,
348+
container_startup_health_check_timeout=container_startup_health_check_timeout,
343349
update_endpoint=update_endpoint,
344350
)
345351

tests/unit/sagemaker/automl/test_auto_ml.py

+3
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,9 @@ def test_deploy_optional_args(candidate_estimator, sagemaker_session, candidate_
596596
deserializer=None,
597597
endpoint_name=JOB_NAME,
598598
kms_key=OUTPUT_KMS_KEY,
599+
volume_size=None,
600+
model_data_download_timeout=None,
601+
container_startup_health_check_timeout=None,
599602
tags=TAGS,
600603
wait=False,
601604
)

0 commit comments

Comments
 (0)