@@ -2970,6 +2970,9 @@ def create_endpoint_config(
2970
2970
tags = None ,
2971
2971
kms_key = None ,
2972
2972
data_capture_config_dict = None ,
2973
+ volume_size = None ,
2974
+ model_data_download_timeout = None ,
2975
+ container_startup_health_check_timeout = None ,
2973
2976
):
2974
2977
"""Create an Amazon SageMaker endpoint configuration.
2975
2978
@@ -2993,6 +2996,16 @@ def create_endpoint_config(
2993
2996
attached to the instance hosting the endpoint.
2994
2997
data_capture_config_dict (dict): Specifies configuration related to Endpoint data
2995
2998
capture for use with Amazon SageMaker Model Monitoring. Default: None.
2999
+ volume_size (int): The size, in GB, of the ML storage volume attached to individual
3000
+ inference instance associated with the production variant. Currenly only Amazon EBS
3001
+ gp2 storage volumes are supported.
3002
+ model_data_download_timeout (int): The timeout value, in seconds, to download and extract
3003
+ model data from Amazon S3 to the individual inference instance associated with this
3004
+ production variant.
3005
+ container_startup_health_check_timeout (int): The timeout value, in seconds, for your
3006
+ inference container to pass health check by SageMaker Hosting. For more information
3007
+ about health check see:
3008
+ https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
2996
3009
2997
3010
Example:
2998
3011
>>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}]
@@ -3014,6 +3027,9 @@ def create_endpoint_config(
3014
3027
instance_type ,
3015
3028
initial_instance_count ,
3016
3029
accelerator_type = accelerator_type ,
3030
+ volume_size = volume_size ,
3031
+ model_data_download_timeout = model_data_download_timeout ,
3032
+ container_startup_health_check_timeout = container_startup_health_check_timeout ,
3017
3033
)
3018
3034
],
3019
3035
}
@@ -4625,6 +4641,9 @@ def production_variant(
4625
4641
initial_weight = 1 ,
4626
4642
accelerator_type = None ,
4627
4643
serverless_inference_config = None ,
4644
+ volume_size = None ,
4645
+ model_data_download_timeout = None ,
4646
+ container_startup_health_check_timeout = None ,
4628
4647
):
4629
4648
"""Create a production variant description suitable for use in a ``ProductionVariant`` list.
4630
4649
@@ -4646,7 +4665,16 @@ def production_variant(
4646
4665
serverless_inference_config (dict): Specifies configuration dict related to serverless
4647
4666
endpoint. The dict is converted from sagemaker.model_monitor.ServerlessInferenceConfig
4648
4667
object (default: None)
4649
-
4668
+ volume_size (int): The size, in GB, of the ML storage volume attached to individual
4669
+ inference instance associated with the production variant. Currenly only Amazon EBS
4670
+ gp2 storage volumes are supported.
4671
+ model_data_download_timeout (int): The timeout value, in seconds, to download and extract
4672
+ model data from Amazon S3 to the individual inference instance associated with this
4673
+ production variant.
4674
+ container_startup_health_check_timeout (int): The timeout value, in seconds, for your
4675
+ inference container to pass health check by SageMaker Hosting. For more information
4676
+ about health check see:
4677
+ https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
4650
4678
Returns:
4651
4679
dict[str, str]: An SageMaker ``ProductionVariant`` description
4652
4680
"""
@@ -4665,6 +4693,12 @@ def production_variant(
4665
4693
initial_instance_count = initial_instance_count or 1
4666
4694
production_variant_configuration ["InitialInstanceCount" ] = initial_instance_count
4667
4695
production_variant_configuration ["InstanceType" ] = instance_type
4696
+ update_args (
4697
+ production_variant_configuration ,
4698
+ VolumeSizeInGB = volume_size ,
4699
+ ModelDataDownloadTimeoutInSeconds = model_data_download_timeout ,
4700
+ ContainerStartupHealthCheckTimeoutInSeconds = container_startup_health_check_timeout ,
4701
+ )
4668
4702
4669
4703
return production_variant_configuration
4670
4704
0 commit comments