Skip to content

Commit bb3d0d2

Browse files
committed
feat: support customized timeout for model data download and inference container startup health check for Hosting Endpoints
This change also enables customization of ML instance storage volume size for Hosting Endpoints.
1 parent 9713203 commit bb3d0d2

File tree

11 files changed

+143
-2
lines changed

11 files changed

+143
-2
lines changed

src/sagemaker/automl/automl.py

+16
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,9 @@ def deploy(
350350
model_kms_key=None,
351351
predictor_cls=None,
352352
inference_response_keys=None,
353+
volume_size=None,
354+
model_data_download_timeout=None,
355+
container_startup_health_check_timeout=None,
353356
):
354357
"""Deploy a candidate to a SageMaker Inference Pipeline.
355358
@@ -396,6 +399,16 @@ def deploy(
396399
function on the created endpoint name.
397400
inference_response_keys (list): List of keys for response content. The order of the
398401
keys will dictate the content order in the response.
402+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
403+
inference instance associated with the production variant. Currenly only Amazon EBS
404+
gp2 storage volumes are supported.
405+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
406+
model data from Amazon S3 to the individual inference instance associated with this
407+
production variant.
408+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
409+
inference container to pass health check by SageMaker Hosting. For more information
410+
about health check see:
411+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
399412
400413
Returns:
401414
callable[string, sagemaker.session.Session] or ``None``:
@@ -421,6 +434,9 @@ def deploy(
421434
deserializer=deserializer,
422435
endpoint_name=endpoint_name,
423436
kms_key=model_kms_key,
437+
volume_size=volume_size,
438+
model_data_download_timeout=model_data_download_timeout,
439+
container_startup_health_check_timeout=container_startup_health_check_timeout,
424440
tags=tags,
425441
wait=wait,
426442
)

src/sagemaker/estimator.py

+16
Original file line numberDiff line numberDiff line change
@@ -1277,6 +1277,9 @@ def deploy(
12771277
tags=None,
12781278
serverless_inference_config=None,
12791279
async_inference_config=None,
1280+
volume_size=None,
1281+
model_data_download_timeout=None,
1282+
container_startup_health_check_timeout=None,
12801283
**kwargs,
12811284
):
12821285
"""Deploy the trained model to an Amazon SageMaker endpoint.
@@ -1344,6 +1347,16 @@ def deploy(
13441347
For more information about tags, see
13451348
https://boto3.amazonaws.com/v1/documentation\
13461349
/api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags
1350+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
1351+
inference instance associated with the production variant. Currenly only Amazon EBS
1352+
gp2 storage volumes are supported.
1353+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
1354+
model data from Amazon S3 to the individual inference instance associated with this
1355+
production variant.
1356+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
1357+
inference container to pass health check by SageMaker Hosting. For more information
1358+
about health check see:
1359+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
13471360
**kwargs: Passed to invocation of ``create_model()``.
13481361
Implementations may customize ``create_model()`` to accept
13491362
``**kwargs`` to customize model creation during deploy.
@@ -1402,6 +1415,9 @@ def deploy(
14021415
data_capture_config=data_capture_config,
14031416
serverless_inference_config=serverless_inference_config,
14041417
async_inference_config=async_inference_config,
1418+
volume_size=volume_size,
1419+
model_data_download_timeout=model_data_download_timeout,
1420+
container_startup_health_check_timeout=container_startup_health_check_timeout,
14051421
)
14061422

14071423
def register(

src/sagemaker/huggingface/model.py

+16
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,9 @@ def deploy(
206206
data_capture_config=None,
207207
async_inference_config=None,
208208
serverless_inference_config=None,
209+
volume_size=None,
210+
model_data_download_timeout=None,
211+
container_startup_health_check_timeout=None,
209212
**kwargs,
210213
):
211214
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -269,6 +272,16 @@ def deploy(
269272
empty object passed through, will use pre-defined values in
270273
``ServerlessInferenceConfig`` class to deploy serverless endpoint. Deploy an
271274
instance based endpoint if it's None. (default: None)
275+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
276+
inference instance associated with the production variant. Currenly only Amazon EBS
277+
gp2 storage volumes are supported.
278+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
279+
model data from Amazon S3 to the individual inference instance associated with this
280+
production variant.
281+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
282+
inference container to pass health check by SageMaker Hosting. For more information
283+
about health check see:
284+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
272285
Raises:
273286
ValueError: If arguments combination check failed in these circumstances:
274287
- If no role is specified or
@@ -301,6 +314,9 @@ def deploy(
301314
data_capture_config,
302315
async_inference_config,
303316
serverless_inference_config,
317+
volume_size=volume_size,
318+
model_data_download_timeout=model_data_download_timeout,
319+
container_startup_health_check_timeout=container_startup_health_check_timeout,
304320
)
305321

306322
def register(

src/sagemaker/model.py

+16
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,9 @@ def deploy(
10291029
data_capture_config=None,
10301030
async_inference_config=None,
10311031
serverless_inference_config=None,
1032+
volume_size=None,
1033+
model_data_download_timeout=None,
1034+
container_startup_health_check_timeout=None,
10321035
**kwargs,
10331036
):
10341037
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -1092,6 +1095,16 @@ def deploy(
10921095
empty object passed through, will use pre-defined values in
10931096
``ServerlessInferenceConfig`` class to deploy serverless endpoint. Deploy an
10941097
instance based endpoint if it's None. (default: None)
1098+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
1099+
inference instance associated with the production variant. Currenly only Amazon EBS
1100+
gp2 storage volumes are supported.
1101+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
1102+
model data from Amazon S3 to the individual inference instance associated with this
1103+
production variant.
1104+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
1105+
inference container to pass health check by SageMaker Hosting. For more information
1106+
about health check see:
1107+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
10951108
Raises:
10961109
ValueError: If arguments combination check failed in these circumstances:
10971110
- If no role is specified or
@@ -1155,6 +1168,9 @@ def deploy(
11551168
initial_instance_count,
11561169
accelerator_type=accelerator_type,
11571170
serverless_inference_config=serverless_inference_config_dict,
1171+
volume_size=volume_size,
1172+
model_data_download_timeout=model_data_download_timeout,
1173+
container_startup_health_check_timeout=container_startup_health_check_timeout,
11581174
)
11591175
if endpoint_name:
11601176
self.endpoint_name = endpoint_name

src/sagemaker/pipeline.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ def deploy(
122122
update_endpoint=False,
123123
data_capture_config=None,
124124
kms_key=None,
125+
volume_size=None,
126+
model_data_download_timeout=None,
127+
container_startup_health_check_timeout=None,
125128
):
126129
"""Deploy the ``Model`` to an ``Endpoint``.
127130
@@ -170,6 +173,16 @@ def deploy(
170173
kms_key (str): The ARN, Key ID or Alias of the KMS key that is used to
171174
encrypt the data on the storage volume attached to the instance hosting
172175
the endpoint.
176+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
177+
inference instance associated with the production variant. Currenly only Amazon EBS
178+
gp2 storage volumes are supported.
179+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
180+
model data from Amazon S3 to the individual inference instance associated with this
181+
production variant.
182+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
183+
inference container to pass health check by SageMaker Hosting. For more information
184+
about health check see:
185+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
173186
174187
Returns:
175188
callable[string, sagemaker.session.Session] or None: Invocation of
@@ -191,7 +204,10 @@ def deploy(
191204
)
192205

193206
production_variant = sagemaker.production_variant(
194-
self.name, instance_type, initial_instance_count
207+
self.name, instance_type, initial_instance_count,
208+
volume_size=volume_size,
209+
model_data_download_timeout=model_data_download_timeout,
210+
container_startup_health_check_timeout=container_startup_health_check_timeout,
195211
)
196212
self.endpoint_name = endpoint_name or self.name
197213

@@ -208,6 +224,9 @@ def deploy(
208224
tags=tags,
209225
kms_key=kms_key,
210226
data_capture_config_dict=data_capture_config_dict,
227+
volume_size=volume_size,
228+
model_data_download_timeout=model_data_download_timeout,
229+
container_startup_health_check_timeout=container_startup_health_check_timeout,
211230
)
212231
self.sagemaker_session.update_endpoint(
213232
self.endpoint_name, endpoint_config_name, wait=wait

src/sagemaker/session.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -2981,6 +2981,9 @@ def create_endpoint_config(
29812981
tags=None,
29822982
kms_key=None,
29832983
data_capture_config_dict=None,
2984+
volume_size=None,
2985+
model_data_download_timeout=None,
2986+
container_startup_health_check_timeout=None,
29842987
):
29852988
"""Create an Amazon SageMaker endpoint configuration.
29862989
@@ -3004,6 +3007,16 @@ def create_endpoint_config(
30043007
attached to the instance hosting the endpoint.
30053008
data_capture_config_dict (dict): Specifies configuration related to Endpoint data
30063009
capture for use with Amazon SageMaker Model Monitoring. Default: None.
3010+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
3011+
inference instance associated with the production variant. Currenly only Amazon EBS
3012+
gp2 storage volumes are supported.
3013+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
3014+
model data from Amazon S3 to the individual inference instance associated with this
3015+
production variant.
3016+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
3017+
inference container to pass health check by SageMaker Hosting. For more information
3018+
about health check see:
3019+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
30073020
30083021
Example:
30093022
>>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}]
@@ -3025,6 +3038,9 @@ def create_endpoint_config(
30253038
instance_type,
30263039
initial_instance_count,
30273040
accelerator_type=accelerator_type,
3041+
volume_size=volume_size,
3042+
model_data_download_timeout=model_data_download_timeout,
3043+
container_startup_health_check_timeout=container_startup_health_check_timeout,
30283044
)
30293045
],
30303046
}
@@ -4636,6 +4652,9 @@ def production_variant(
46364652
initial_weight=1,
46374653
accelerator_type=None,
46384654
serverless_inference_config=None,
4655+
volume_size=None,
4656+
model_data_download_timeout=None,
4657+
container_startup_health_check_timeout=None,
46394658
):
46404659
"""Create a production variant description suitable for use in a ``ProductionVariant`` list.
46414660
@@ -4657,7 +4676,16 @@ def production_variant(
46574676
serverless_inference_config (dict): Specifies configuration dict related to serverless
46584677
endpoint. The dict is converted from sagemaker.model_monitor.ServerlessInferenceConfig
46594678
object (default: None)
4660-
4679+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
4680+
inference instance associated with the production variant. Currenly only Amazon EBS
4681+
gp2 storage volumes are supported.
4682+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
4683+
model data from Amazon S3 to the individual inference instance associated with this
4684+
production variant.
4685+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
4686+
inference container to pass health check by SageMaker Hosting. For more information
4687+
about health check see:
4688+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
46614689
Returns:
46624690
dict[str, str]: An SageMaker ``ProductionVariant`` description
46634691
"""
@@ -4676,6 +4704,12 @@ def production_variant(
46764704
initial_instance_count = initial_instance_count or 1
46774705
production_variant_configuration["InitialInstanceCount"] = initial_instance_count
46784706
production_variant_configuration["InstanceType"] = instance_type
4707+
update_args(
4708+
production_variant_configuration,
4709+
VolumeSizeInGB=volume_size,
4710+
ModelDataDownloadTimeoutInSeconds=model_data_download_timeout,
4711+
ContainerStartupHealthCheckTimeoutInSeconds=container_startup_health_check_timeout,
4712+
)
46794713

46804714
return production_variant_configuration
46814715

src/sagemaker/tensorflow/model.py

+6
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,9 @@ def deploy(
320320
update_endpoint=None,
321321
async_inference_config=None,
322322
serverless_inference_config=None,
323+
volume_size=None,
324+
model_data_download_timeout=None,
325+
container_startup_health_check_timeout=None,
323326
):
324327
"""Deploy a Tensorflow ``Model`` to a SageMaker ``Endpoint``."""
325328

@@ -340,6 +343,9 @@ def deploy(
340343
data_capture_config=data_capture_config,
341344
async_inference_config=async_inference_config,
342345
serverless_inference_config=serverless_inference_config,
346+
volume_size=volume_size,
347+
model_data_download_timeout=model_data_download_timeout,
348+
container_startup_health_check_timeout=container_startup_health_check_timeout,
343349
update_endpoint=update_endpoint,
344350
)
345351

tests/unit/sagemaker/automl/test_auto_ml.py

+3
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,9 @@ def test_deploy_optional_args(candidate_estimator, sagemaker_session, candidate_
596596
deserializer=None,
597597
endpoint_name=JOB_NAME,
598598
kms_key=OUTPUT_KMS_KEY,
599+
volume_size=None,
600+
model_data_download_timeout=None,
601+
container_startup_health_check_timeout=None,
599602
tags=TAGS,
600603
wait=False,
601604
)

tests/unit/sagemaker/model/test_deploy.py

+9
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ def test_deploy(name_from_base, prepare_container_def, production_variant, sagem
7171
INSTANCE_COUNT,
7272
accelerator_type=None,
7373
serverless_inference_config=None,
74+
volume_size=None,
75+
model_data_download_timeout=None,
76+
container_startup_health_check_timeout=None,
7477
)
7578

7679
sagemaker_session.create_model.assert_called_with(
@@ -120,6 +123,9 @@ def test_deploy_accelerator_type(
120123
INSTANCE_COUNT,
121124
accelerator_type=ACCELERATOR_TYPE,
122125
serverless_inference_config=None,
126+
volume_size=None,
127+
model_data_download_timeout=None,
128+
container_startup_health_check_timeout=None,
123129
)
124130

125131
sagemaker_session.endpoint_from_production_variants.assert_called_with(
@@ -363,6 +369,9 @@ def test_deploy_serverless_inference(production_variant, create_sagemaker_model,
363369
None,
364370
accelerator_type=None,
365371
serverless_inference_config=serverless_inference_config_dict,
372+
volume_size=None,
373+
model_data_download_timeout=None,
374+
container_startup_health_check_timeout=None,
366375
)
367376

368377
sagemaker_session.endpoint_from_production_variants.assert_called_with(

tests/unit/test_estimator.py

+3
Original file line numberDiff line numberDiff line change
@@ -3169,6 +3169,9 @@ def test_generic_to_deploy_kms(create_model, sagemaker_session):
31693169
data_capture_config=None,
31703170
async_inference_config=None,
31713171
serverless_inference_config=None,
3172+
volume_size=None,
3173+
model_data_download_timeout=None,
3174+
container_startup_health_check_timeout=None,
31723175
)
31733176

31743177

tests/unit/test_pipeline_model.py

+3
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,9 @@ def test_deploy_update_endpoint(tfo, time, sagemaker_session):
188188
tags=None,
189189
kms_key=None,
190190
data_capture_config_dict=None,
191+
volume_size=None,
192+
model_data_download_timeout=None,
193+
container_startup_health_check_timeout=None,
191194
)
192195
config_name = sagemaker_session.create_endpoint_config(
193196
name=model.name,

0 commit comments

Comments
 (0)