Skip to content

feature: support customized timeout for model data download and inference container startup health check for Hosting Endpoints #3388

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/sagemaker/automl/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,9 @@ def deploy(
model_kms_key=None,
predictor_cls=None,
inference_response_keys=None,
volume_size=None,
model_data_download_timeout=None,
container_startup_health_check_timeout=None,
):
"""Deploy a candidate to a SageMaker Inference Pipeline.

Expand Down Expand Up @@ -396,6 +399,16 @@ def deploy(
function on the created endpoint name.
inference_response_keys (list): List of keys for response content. The order of the
keys will dictate the content order in the response.
volume_size (int): The size, in GB, of the ML storage volume attached to individual
inference instance associated with the production variant. Currenly only Amazon EBS
gp2 storage volumes are supported.
model_data_download_timeout (int): The timeout value, in seconds, to download and
extract model data from Amazon S3 to the individual inference instance associated
with this production variant.
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
inference container to pass health check by SageMaker Hosting. For more information
about health check see:
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests

Returns:
callable[string, sagemaker.session.Session] or ``None``:
Expand Down Expand Up @@ -423,6 +436,9 @@ def deploy(
kms_key=model_kms_key,
tags=tags,
wait=wait,
volume_size=volume_size,
model_data_download_timeout=model_data_download_timeout,
container_startup_health_check_timeout=container_startup_health_check_timeout,
)

def _check_problem_type_and_job_objective(self, problem_type, job_objective):
Expand Down
16 changes: 16 additions & 0 deletions src/sagemaker/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1304,6 +1304,9 @@ def deploy(
tags=None,
serverless_inference_config=None,
async_inference_config=None,
volume_size=None,
model_data_download_timeout=None,
container_startup_health_check_timeout=None,
**kwargs,
):
"""Deploy the trained model to an Amazon SageMaker endpoint.
Expand Down Expand Up @@ -1371,6 +1374,16 @@ def deploy(
For more information about tags, see
https://boto3.amazonaws.com/v1/documentation\
/api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags
volume_size (int): The size, in GB, of the ML storage volume attached to individual
inference instance associated with the production variant. Currenly only Amazon EBS
gp2 storage volumes are supported.
model_data_download_timeout (int): The timeout value, in seconds, to download and
extract model data from Amazon S3 to the individual inference instance associated
with this production variant.
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
inference container to pass health check by SageMaker Hosting. For more information
about health check see:
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
**kwargs: Passed to invocation of ``create_model()``.
Implementations may customize ``create_model()`` to accept
``**kwargs`` to customize model creation during deploy.
Expand Down Expand Up @@ -1429,6 +1442,9 @@ def deploy(
data_capture_config=data_capture_config,
serverless_inference_config=serverless_inference_config,
async_inference_config=async_inference_config,
volume_size=volume_size,
model_data_download_timeout=model_data_download_timeout,
container_startup_health_check_timeout=container_startup_health_check_timeout,
)

def register(
Expand Down
16 changes: 16 additions & 0 deletions src/sagemaker/huggingface/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ def deploy(
data_capture_config=None,
async_inference_config=None,
serverless_inference_config=None,
volume_size=None,
model_data_download_timeout=None,
container_startup_health_check_timeout=None,
**kwargs,
):
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
Expand Down Expand Up @@ -269,6 +272,16 @@ def deploy(
empty object passed through, will use pre-defined values in
``ServerlessInferenceConfig`` class to deploy serverless endpoint. Deploy an
instance based endpoint if it's None. (default: None)
volume_size (int): The size, in GB, of the ML storage volume attached to individual
inference instance associated with the production variant. Currenly only Amazon EBS
gp2 storage volumes are supported.
model_data_download_timeout (int): The timeout value, in seconds, to download and
extract model data from Amazon S3 to the individual inference instance associated
with this production variant.
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
inference container to pass health check by SageMaker Hosting. For more information
about health check see:
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
Raises:
ValueError: If arguments combination check failed in these circumstances:
- If no role is specified or
Expand Down Expand Up @@ -301,6 +314,9 @@ def deploy(
data_capture_config,
async_inference_config,
serverless_inference_config,
volume_size=volume_size,
model_data_download_timeout=model_data_download_timeout,
container_startup_health_check_timeout=container_startup_health_check_timeout,
)

def register(
Expand Down
16 changes: 16 additions & 0 deletions src/sagemaker/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1029,6 +1029,9 @@ def deploy(
data_capture_config=None,
async_inference_config=None,
serverless_inference_config=None,
volume_size=None,
model_data_download_timeout=None,
container_startup_health_check_timeout=None,
**kwargs,
):
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
Expand Down Expand Up @@ -1092,6 +1095,16 @@ def deploy(
empty object passed through, will use pre-defined values in
``ServerlessInferenceConfig`` class to deploy serverless endpoint. Deploy an
instance based endpoint if it's None. (default: None)
volume_size (int): The size, in GB, of the ML storage volume attached to individual
inference instance associated with the production variant. Currenly only Amazon EBS
gp2 storage volumes are supported.
model_data_download_timeout (int): The timeout value, in seconds, to download and
extract model data from Amazon S3 to the individual inference instance associated
with this production variant.
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
inference container to pass health check by SageMaker Hosting. For more information
about health check see:
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
Raises:
ValueError: If arguments combination check failed in these circumstances:
- If no role is specified or
Expand Down Expand Up @@ -1155,6 +1168,9 @@ def deploy(
initial_instance_count,
accelerator_type=accelerator_type,
serverless_inference_config=serverless_inference_config_dict,
volume_size=volume_size,
model_data_download_timeout=model_data_download_timeout,
container_startup_health_check_timeout=container_startup_health_check_timeout,
)
if endpoint_name:
self.endpoint_name = endpoint_name
Expand Down
23 changes: 22 additions & 1 deletion src/sagemaker/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ def deploy(
update_endpoint=False,
data_capture_config=None,
kms_key=None,
volume_size=None,
model_data_download_timeout=None,
container_startup_health_check_timeout=None,
):
"""Deploy the ``Model`` to an ``Endpoint``.

Expand Down Expand Up @@ -170,6 +173,16 @@ def deploy(
kms_key (str): The ARN, Key ID or Alias of the KMS key that is used to
encrypt the data on the storage volume attached to the instance hosting
the endpoint.
volume_size (int): The size, in GB, of the ML storage volume attached to individual
inference instance associated with the production variant. Currenly only Amazon EBS
gp2 storage volumes are supported.
model_data_download_timeout (int): The timeout value, in seconds, to download and
extract model data from Amazon S3 to the individual inference instance associated
with this production variant.
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
inference container to pass health check by SageMaker Hosting. For more information
about health check see:
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests

Returns:
callable[string, sagemaker.session.Session] or None: Invocation of
Expand All @@ -191,7 +204,12 @@ def deploy(
)

production_variant = sagemaker.production_variant(
self.name, instance_type, initial_instance_count
self.name,
instance_type,
initial_instance_count,
volume_size=volume_size,
model_data_download_timeout=model_data_download_timeout,
container_startup_health_check_timeout=container_startup_health_check_timeout,
)
self.endpoint_name = endpoint_name or self.name

Expand All @@ -208,6 +226,9 @@ def deploy(
tags=tags,
kms_key=kms_key,
data_capture_config_dict=data_capture_config_dict,
volume_size=volume_size,
model_data_download_timeout=model_data_download_timeout,
container_startup_health_check_timeout=container_startup_health_check_timeout,
)
self.sagemaker_session.update_endpoint(
self.endpoint_name, endpoint_config_name, wait=wait
Expand Down
36 changes: 35 additions & 1 deletion src/sagemaker/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -2981,6 +2981,9 @@ def create_endpoint_config(
tags=None,
kms_key=None,
data_capture_config_dict=None,
volume_size=None,
model_data_download_timeout=None,
container_startup_health_check_timeout=None,
):
"""Create an Amazon SageMaker endpoint configuration.

Expand All @@ -3004,6 +3007,16 @@ def create_endpoint_config(
attached to the instance hosting the endpoint.
data_capture_config_dict (dict): Specifies configuration related to Endpoint data
capture for use with Amazon SageMaker Model Monitoring. Default: None.
volume_size (int): The size, in GB, of the ML storage volume attached to individual
inference instance associated with the production variant. Currenly only Amazon EBS
gp2 storage volumes are supported.
model_data_download_timeout (int): The timeout value, in seconds, to download and
extract model data from Amazon S3 to the individual inference instance associated
with this production variant.
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
inference container to pass health check by SageMaker Hosting. For more information
about health check see:
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests

Example:
>>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}]
Expand All @@ -3025,6 +3038,9 @@ def create_endpoint_config(
instance_type,
initial_instance_count,
accelerator_type=accelerator_type,
volume_size=volume_size,
model_data_download_timeout=model_data_download_timeout,
container_startup_health_check_timeout=container_startup_health_check_timeout,
)
],
}
Expand Down Expand Up @@ -4636,6 +4652,9 @@ def production_variant(
initial_weight=1,
accelerator_type=None,
serverless_inference_config=None,
volume_size=None,
model_data_download_timeout=None,
container_startup_health_check_timeout=None,
):
"""Create a production variant description suitable for use in a ``ProductionVariant`` list.

Expand All @@ -4657,7 +4676,16 @@ def production_variant(
serverless_inference_config (dict): Specifies configuration dict related to serverless
endpoint. The dict is converted from sagemaker.model_monitor.ServerlessInferenceConfig
object (default: None)

volume_size (int): The size, in GB, of the ML storage volume attached to individual
inference instance associated with the production variant. Currenly only Amazon EBS
gp2 storage volumes are supported.
model_data_download_timeout (int): The timeout value, in seconds, to download and extract
model data from Amazon S3 to the individual inference instance associated with this
production variant.
container_startup_health_check_timeout (int): The timeout value, in seconds, for your
inference container to pass health check by SageMaker Hosting. For more information
about health check see:
https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
Returns:
dict[str, str]: An SageMaker ``ProductionVariant`` description
"""
Expand All @@ -4676,6 +4704,12 @@ def production_variant(
initial_instance_count = initial_instance_count or 1
production_variant_configuration["InitialInstanceCount"] = initial_instance_count
production_variant_configuration["InstanceType"] = instance_type
update_args(
production_variant_configuration,
VolumeSizeInGB=volume_size,
ModelDataDownloadTimeoutInSeconds=model_data_download_timeout,
ContainerStartupHealthCheckTimeoutInSeconds=container_startup_health_check_timeout,
)

return production_variant_configuration

Expand Down
6 changes: 6 additions & 0 deletions src/sagemaker/tensorflow/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,9 @@ def deploy(
update_endpoint=None,
async_inference_config=None,
serverless_inference_config=None,
volume_size=None,
model_data_download_timeout=None,
container_startup_health_check_timeout=None,
):
"""Deploy a Tensorflow ``Model`` to a SageMaker ``Endpoint``."""

Expand All @@ -340,6 +343,9 @@ def deploy(
data_capture_config=data_capture_config,
async_inference_config=async_inference_config,
serverless_inference_config=serverless_inference_config,
volume_size=volume_size,
model_data_download_timeout=model_data_download_timeout,
container_startup_health_check_timeout=container_startup_health_check_timeout,
update_endpoint=update_endpoint,
)

Expand Down
3 changes: 3 additions & 0 deletions tests/unit/sagemaker/automl/test_auto_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,9 @@ def test_deploy_optional_args(candidate_estimator, sagemaker_session, candidate_
deserializer=None,
endpoint_name=JOB_NAME,
kms_key=OUTPUT_KMS_KEY,
volume_size=None,
model_data_download_timeout=None,
container_startup_health_check_timeout=None,
tags=TAGS,
wait=False,
)
Expand Down
Loading