@@ -2981,6 +2981,9 @@ def create_endpoint_config(
2981
2981
tags = None ,
2982
2982
kms_key = None ,
2983
2983
data_capture_config_dict = None ,
2984
+ volume_size = None ,
2985
+ model_data_download_timeout = None ,
2986
+ container_startup_health_check_timeout = None ,
2984
2987
):
2985
2988
"""Create an Amazon SageMaker endpoint configuration.
2986
2989
@@ -3004,6 +3007,16 @@ def create_endpoint_config(
3004
3007
attached to the instance hosting the endpoint.
3005
3008
data_capture_config_dict (dict): Specifies configuration related to Endpoint data
3006
3009
capture for use with Amazon SageMaker Model Monitoring. Default: None.
3010
+ volume_size (int): The size, in GB, of the ML storage volume attached to individual
3011
+ inference instance associated with the production variant. Currenly only Amazon EBS
3012
+ gp2 storage volumes are supported.
3013
+ model_data_download_timeout (int): The timeout value, in seconds, to download and
3014
+ extract model data from Amazon S3 to the individual inference instance associated
3015
+ with this production variant.
3016
+ container_startup_health_check_timeout (int): The timeout value, in seconds, for your
3017
+ inference container to pass health check by SageMaker Hosting. For more information
3018
+ about health check see:
3019
+ https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
3007
3020
3008
3021
Example:
3009
3022
>>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}]
@@ -3025,6 +3038,9 @@ def create_endpoint_config(
3025
3038
instance_type ,
3026
3039
initial_instance_count ,
3027
3040
accelerator_type = accelerator_type ,
3041
+ volume_size = volume_size ,
3042
+ model_data_download_timeout = model_data_download_timeout ,
3043
+ container_startup_health_check_timeout = container_startup_health_check_timeout ,
3028
3044
)
3029
3045
],
3030
3046
}
@@ -4636,6 +4652,9 @@ def production_variant(
4636
4652
initial_weight = 1 ,
4637
4653
accelerator_type = None ,
4638
4654
serverless_inference_config = None ,
4655
+ volume_size = None ,
4656
+ model_data_download_timeout = None ,
4657
+ container_startup_health_check_timeout = None ,
4639
4658
):
4640
4659
"""Create a production variant description suitable for use in a ``ProductionVariant`` list.
4641
4660
@@ -4657,7 +4676,16 @@ def production_variant(
4657
4676
serverless_inference_config (dict): Specifies configuration dict related to serverless
4658
4677
endpoint. The dict is converted from sagemaker.model_monitor.ServerlessInferenceConfig
4659
4678
object (default: None)
4660
-
4679
+ volume_size (int): The size, in GB, of the ML storage volume attached to individual
4680
+ inference instance associated with the production variant. Currenly only Amazon EBS
4681
+ gp2 storage volumes are supported.
4682
+ model_data_download_timeout (int): The timeout value, in seconds, to download and extract
4683
+ model data from Amazon S3 to the individual inference instance associated with this
4684
+ production variant.
4685
+ container_startup_health_check_timeout (int): The timeout value, in seconds, for your
4686
+ inference container to pass health check by SageMaker Hosting. For more information
4687
+ about health check see:
4688
+ https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
4661
4689
Returns:
4662
4690
dict[str, str]: An SageMaker ``ProductionVariant`` description
4663
4691
"""
@@ -4676,6 +4704,12 @@ def production_variant(
4676
4704
initial_instance_count = initial_instance_count or 1
4677
4705
production_variant_configuration ["InitialInstanceCount" ] = initial_instance_count
4678
4706
production_variant_configuration ["InstanceType" ] = instance_type
4707
+ update_args (
4708
+ production_variant_configuration ,
4709
+ VolumeSizeInGB = volume_size ,
4710
+ ModelDataDownloadTimeoutInSeconds = model_data_download_timeout ,
4711
+ ContainerStartupHealthCheckTimeoutInSeconds = container_startup_health_check_timeout ,
4712
+ )
4679
4713
4680
4714
return production_variant_configuration
4681
4715
0 commit comments