Skip to content

Commit 9cf6e7f

Browse files
chiui0x18claytonparnell
authored andcommitted
feat: support customized timeout for model data download and inference container startup health check for Hosting Endpoints
This change also enables customization of ML instance storage volume size for Hosting Endpoints.
1 parent f2d5e41 commit 9cf6e7f

File tree

11 files changed

+143
-2
lines changed

11 files changed

+143
-2
lines changed

src/sagemaker/automl/automl.py

+16
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,9 @@ def deploy(
350350
model_kms_key=None,
351351
predictor_cls=None,
352352
inference_response_keys=None,
353+
volume_size=None,
354+
model_data_download_timeout=None,
355+
container_startup_health_check_timeout=None,
353356
):
354357
"""Deploy a candidate to a SageMaker Inference Pipeline.
355358
@@ -396,6 +399,16 @@ def deploy(
396399
function on the created endpoint name.
397400
inference_response_keys (list): List of keys for response content. The order of the
398401
keys will dictate the content order in the response.
402+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
403+
inference instance associated with the production variant. Currenly only Amazon EBS
404+
gp2 storage volumes are supported.
405+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
406+
model data from Amazon S3 to the individual inference instance associated with this
407+
production variant.
408+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
409+
inference container to pass health check by SageMaker Hosting. For more information
410+
about health check see:
411+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
399412
400413
Returns:
401414
callable[string, sagemaker.session.Session] or ``None``:
@@ -421,6 +434,9 @@ def deploy(
421434
deserializer=deserializer,
422435
endpoint_name=endpoint_name,
423436
kms_key=model_kms_key,
437+
volume_size=volume_size,
438+
model_data_download_timeout=model_data_download_timeout,
439+
container_startup_health_check_timeout=container_startup_health_check_timeout,
424440
tags=tags,
425441
wait=wait,
426442
)

src/sagemaker/estimator.py

+16
Original file line numberDiff line numberDiff line change
@@ -1281,6 +1281,9 @@ def deploy(
12811281
tags=None,
12821282
serverless_inference_config=None,
12831283
async_inference_config=None,
1284+
volume_size=None,
1285+
model_data_download_timeout=None,
1286+
container_startup_health_check_timeout=None,
12841287
**kwargs,
12851288
):
12861289
"""Deploy the trained model to an Amazon SageMaker endpoint.
@@ -1348,6 +1351,16 @@ def deploy(
13481351
For more information about tags, see
13491352
https://boto3.amazonaws.com/v1/documentation\
13501353
/api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags
1354+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
1355+
inference instance associated with the production variant. Currenly only Amazon EBS
1356+
gp2 storage volumes are supported.
1357+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
1358+
model data from Amazon S3 to the individual inference instance associated with this
1359+
production variant.
1360+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
1361+
inference container to pass health check by SageMaker Hosting. For more information
1362+
about health check see:
1363+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
13511364
**kwargs: Passed to invocation of ``create_model()``.
13521365
Implementations may customize ``create_model()`` to accept
13531366
``**kwargs`` to customize model creation during deploy.
@@ -1406,6 +1419,9 @@ def deploy(
14061419
data_capture_config=data_capture_config,
14071420
serverless_inference_config=serverless_inference_config,
14081421
async_inference_config=async_inference_config,
1422+
volume_size=volume_size,
1423+
model_data_download_timeout=model_data_download_timeout,
1424+
container_startup_health_check_timeout=container_startup_health_check_timeout,
14091425
)
14101426

14111427
def register(

src/sagemaker/huggingface/model.py

+16
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,9 @@ def deploy(
206206
data_capture_config=None,
207207
async_inference_config=None,
208208
serverless_inference_config=None,
209+
volume_size=None,
210+
model_data_download_timeout=None,
211+
container_startup_health_check_timeout=None,
209212
**kwargs,
210213
):
211214
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -269,6 +272,16 @@ def deploy(
269272
empty object passed through, will use pre-defined values in
270273
``ServerlessInferenceConfig`` class to deploy serverless endpoint. Deploy an
271274
instance based endpoint if it's None. (default: None)
275+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
276+
inference instance associated with the production variant. Currenly only Amazon EBS
277+
gp2 storage volumes are supported.
278+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
279+
model data from Amazon S3 to the individual inference instance associated with this
280+
production variant.
281+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
282+
inference container to pass health check by SageMaker Hosting. For more information
283+
about health check see:
284+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
272285
Raises:
273286
ValueError: If arguments combination check failed in these circumstances:
274287
- If no role is specified or
@@ -301,6 +314,9 @@ def deploy(
301314
data_capture_config,
302315
async_inference_config,
303316
serverless_inference_config,
317+
volume_size=volume_size,
318+
model_data_download_timeout=model_data_download_timeout,
319+
container_startup_health_check_timeout=container_startup_health_check_timeout,
304320
)
305321

306322
def register(

src/sagemaker/model.py

+16
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,9 @@ def deploy(
10291029
data_capture_config=None,
10301030
async_inference_config=None,
10311031
serverless_inference_config=None,
1032+
volume_size=None,
1033+
model_data_download_timeout=None,
1034+
container_startup_health_check_timeout=None,
10321035
**kwargs,
10331036
):
10341037
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -1092,6 +1095,16 @@ def deploy(
10921095
empty object passed through, will use pre-defined values in
10931096
``ServerlessInferenceConfig`` class to deploy serverless endpoint. Deploy an
10941097
instance based endpoint if it's None. (default: None)
1098+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
1099+
inference instance associated with the production variant. Currenly only Amazon EBS
1100+
gp2 storage volumes are supported.
1101+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
1102+
model data from Amazon S3 to the individual inference instance associated with this
1103+
production variant.
1104+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
1105+
inference container to pass health check by SageMaker Hosting. For more information
1106+
about health check see:
1107+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
10951108
Raises:
10961109
ValueError: If arguments combination check failed in these circumstances:
10971110
- If no role is specified or
@@ -1155,6 +1168,9 @@ def deploy(
11551168
initial_instance_count,
11561169
accelerator_type=accelerator_type,
11571170
serverless_inference_config=serverless_inference_config_dict,
1171+
volume_size=volume_size,
1172+
model_data_download_timeout=model_data_download_timeout,
1173+
container_startup_health_check_timeout=container_startup_health_check_timeout,
11581174
)
11591175
if endpoint_name:
11601176
self.endpoint_name = endpoint_name

src/sagemaker/pipeline.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ def deploy(
122122
update_endpoint=False,
123123
data_capture_config=None,
124124
kms_key=None,
125+
volume_size=None,
126+
model_data_download_timeout=None,
127+
container_startup_health_check_timeout=None,
125128
):
126129
"""Deploy the ``Model`` to an ``Endpoint``.
127130
@@ -170,6 +173,16 @@ def deploy(
170173
kms_key (str): The ARN, Key ID or Alias of the KMS key that is used to
171174
encrypt the data on the storage volume attached to the instance hosting
172175
the endpoint.
176+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
177+
inference instance associated with the production variant. Currenly only Amazon EBS
178+
gp2 storage volumes are supported.
179+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
180+
model data from Amazon S3 to the individual inference instance associated with this
181+
production variant.
182+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
183+
inference container to pass health check by SageMaker Hosting. For more information
184+
about health check see:
185+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
173186
174187
Returns:
175188
callable[string, sagemaker.session.Session] or None: Invocation of
@@ -191,7 +204,10 @@ def deploy(
191204
)
192205

193206
production_variant = sagemaker.production_variant(
194-
self.name, instance_type, initial_instance_count
207+
self.name, instance_type, initial_instance_count,
208+
volume_size=volume_size,
209+
model_data_download_timeout=model_data_download_timeout,
210+
container_startup_health_check_timeout=container_startup_health_check_timeout,
195211
)
196212
self.endpoint_name = endpoint_name or self.name
197213

@@ -208,6 +224,9 @@ def deploy(
208224
tags=tags,
209225
kms_key=kms_key,
210226
data_capture_config_dict=data_capture_config_dict,
227+
volume_size=volume_size,
228+
model_data_download_timeout=model_data_download_timeout,
229+
container_startup_health_check_timeout=container_startup_health_check_timeout,
211230
)
212231
self.sagemaker_session.update_endpoint(
213232
self.endpoint_name, endpoint_config_name, wait=wait

src/sagemaker/session.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -2970,6 +2970,9 @@ def create_endpoint_config(
29702970
tags=None,
29712971
kms_key=None,
29722972
data_capture_config_dict=None,
2973+
volume_size=None,
2974+
model_data_download_timeout=None,
2975+
container_startup_health_check_timeout=None,
29732976
):
29742977
"""Create an Amazon SageMaker endpoint configuration.
29752978
@@ -2993,6 +2996,16 @@ def create_endpoint_config(
29932996
attached to the instance hosting the endpoint.
29942997
data_capture_config_dict (dict): Specifies configuration related to Endpoint data
29952998
capture for use with Amazon SageMaker Model Monitoring. Default: None.
2999+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
3000+
inference instance associated with the production variant. Currenly only Amazon EBS
3001+
gp2 storage volumes are supported.
3002+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
3003+
model data from Amazon S3 to the individual inference instance associated with this
3004+
production variant.
3005+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
3006+
inference container to pass health check by SageMaker Hosting. For more information
3007+
about health check see:
3008+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
29963009
29973010
Example:
29983011
>>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}]
@@ -3014,6 +3027,9 @@ def create_endpoint_config(
30143027
instance_type,
30153028
initial_instance_count,
30163029
accelerator_type=accelerator_type,
3030+
volume_size=volume_size,
3031+
model_data_download_timeout=model_data_download_timeout,
3032+
container_startup_health_check_timeout=container_startup_health_check_timeout,
30173033
)
30183034
],
30193035
}
@@ -4625,6 +4641,9 @@ def production_variant(
46254641
initial_weight=1,
46264642
accelerator_type=None,
46274643
serverless_inference_config=None,
4644+
volume_size=None,
4645+
model_data_download_timeout=None,
4646+
container_startup_health_check_timeout=None,
46284647
):
46294648
"""Create a production variant description suitable for use in a ``ProductionVariant`` list.
46304649
@@ -4646,7 +4665,16 @@ def production_variant(
46464665
serverless_inference_config (dict): Specifies configuration dict related to serverless
46474666
endpoint. The dict is converted from sagemaker.model_monitor.ServerlessInferenceConfig
46484667
object (default: None)
4649-
4668+
volume_size (int): The size, in GB, of the ML storage volume attached to individual
4669+
inference instance associated with the production variant. Currenly only Amazon EBS
4670+
gp2 storage volumes are supported.
4671+
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
4672+
model data from Amazon S3 to the individual inference instance associated with this
4673+
production variant.
4674+
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
4675+
inference container to pass health check by SageMaker Hosting. For more information
4676+
about health check see:
4677+
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
46504678
Returns:
46514679
dict[str, str]: An SageMaker ``ProductionVariant`` description
46524680
"""
@@ -4665,6 +4693,12 @@ def production_variant(
46654693
initial_instance_count = initial_instance_count or 1
46664694
production_variant_configuration["InitialInstanceCount"] = initial_instance_count
46674695
production_variant_configuration["InstanceType"] = instance_type
4696+
update_args(
4697+
production_variant_configuration,
4698+
VolumeSizeInGB=volume_size,
4699+
ModelDataDownloadTimeoutInSeconds=model_data_download_timeout,
4700+
ContainerStartupHealthCheckTimeoutInSeconds=container_startup_health_check_timeout,
4701+
)
46684702

46694703
return production_variant_configuration
46704704

src/sagemaker/tensorflow/model.py

+6
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,9 @@ def deploy(
320320
update_endpoint=None,
321321
async_inference_config=None,
322322
serverless_inference_config=None,
323+
volume_size=None,
324+
model_data_download_timeout=None,
325+
container_startup_health_check_timeout=None,
323326
):
324327
"""Deploy a Tensorflow ``Model`` to a SageMaker ``Endpoint``."""
325328

@@ -340,6 +343,9 @@ def deploy(
340343
data_capture_config=data_capture_config,
341344
async_inference_config=async_inference_config,
342345
serverless_inference_config=serverless_inference_config,
346+
volume_size=volume_size,
347+
model_data_download_timeout=model_data_download_timeout,
348+
container_startup_health_check_timeout=container_startup_health_check_timeout,
343349
update_endpoint=update_endpoint,
344350
)
345351

tests/unit/sagemaker/automl/test_auto_ml.py

+3
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,9 @@ def test_deploy_optional_args(candidate_estimator, sagemaker_session, candidate_
596596
deserializer=None,
597597
endpoint_name=JOB_NAME,
598598
kms_key=OUTPUT_KMS_KEY,
599+
volume_size=None,
600+
model_data_download_timeout=None,
601+
container_startup_health_check_timeout=None,
599602
tags=TAGS,
600603
wait=False,
601604
)

tests/unit/sagemaker/model/test_deploy.py

+9
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ def test_deploy(name_from_base, prepare_container_def, production_variant, sagem
7171
INSTANCE_COUNT,
7272
accelerator_type=None,
7373
serverless_inference_config=None,
74+
volume_size=None,
75+
model_data_download_timeout=None,
76+
container_startup_health_check_timeout=None,
7477
)
7578

7679
sagemaker_session.create_model.assert_called_with(
@@ -120,6 +123,9 @@ def test_deploy_accelerator_type(
120123
INSTANCE_COUNT,
121124
accelerator_type=ACCELERATOR_TYPE,
122125
serverless_inference_config=None,
126+
volume_size=None,
127+
model_data_download_timeout=None,
128+
container_startup_health_check_timeout=None,
123129
)
124130

125131
sagemaker_session.endpoint_from_production_variants.assert_called_with(
@@ -363,6 +369,9 @@ def test_deploy_serverless_inference(production_variant, create_sagemaker_model,
363369
None,
364370
accelerator_type=None,
365371
serverless_inference_config=serverless_inference_config_dict,
372+
volume_size=None,
373+
model_data_download_timeout=None,
374+
container_startup_health_check_timeout=None,
366375
)
367376

368377
sagemaker_session.endpoint_from_production_variants.assert_called_with(

tests/unit/test_estimator.py

+3
Original file line numberDiff line numberDiff line change
@@ -3169,6 +3169,9 @@ def test_generic_to_deploy_kms(create_model, sagemaker_session):
31693169
data_capture_config=None,
31703170
async_inference_config=None,
31713171
serverless_inference_config=None,
3172+
volume_size=None,
3173+
model_data_download_timeout=None,
3174+
container_startup_health_check_timeout=None,
31723175
)
31733176

31743177

tests/unit/test_pipeline_model.py

+3
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,9 @@ def test_deploy_update_endpoint(tfo, time, sagemaker_session):
188188
tags=None,
189189
kms_key=None,
190190
data_capture_config_dict=None,
191+
volume_size=None,
192+
model_data_download_timeout=None,
193+
container_startup_health_check_timeout=None,
191194
)
192195
config_name = sagemaker_session.create_endpoint_config(
193196
name=model.name,

0 commit comments

Comments
 (0)