feature: Enabled update_endpoint through model_builder (aws#5085)

rsareddy0329 · Roja Reddy Sareddy · web-flow · commit 9ead9c88874e · 2025-03-17T21:28:54.000-07:00
* feature: Enabled update_endpoint through model_builder

* fix: fix unit test, black-check, pylint errors

* fix: fix black-check, pylint errors

---------

Co-authored-by: Roja Reddy Sareddy &lt;rsareddy@amazon.com&gt;
diff --git a/src/sagemaker/huggingface/model.py b/src/sagemaker/huggingface/model.py
@@ -218,6 +218,7 @@ def deploy(
         container_startup_health_check_timeout=None,
         inference_recommendation_id=None,
         explainer_config=None,
+        update_endpoint: Optional[bool] = False,
         **kwargs,
     ):
         """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -296,6 +297,11 @@ def deploy(
                 would like to deploy the model and endpoint with recommended parameters.
             explainer_config (sagemaker.explainer.ExplainerConfig): Specifies online explainability
                 configuration for use with Amazon SageMaker Clarify. (default: None)
+            update_endpoint (Optional[bool]):
+                Flag to update the model in an existing Amazon SageMaker endpoint.
+                If True, this will deploy a new EndpointConfig to an already existing endpoint
+                and delete resources corresponding to the previous EndpointConfig. Default: False
+                Note: Currently this is supported for single model endpoints
         Raises:
              ValueError: If arguments combination check failed in these circumstances:
                 - If no role is specified or
@@ -335,6 +341,7 @@ def deploy(
             container_startup_health_check_timeout=container_startup_health_check_timeout,
             inference_recommendation_id=inference_recommendation_id,
             explainer_config=explainer_config,
+            update_endpoint=update_endpoint,
             **kwargs,
         )
 
diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py
@@ -53,7 +53,6 @@
 from sagemaker.model_card.schema_constraints import ModelApprovalStatusEnum
 from sagemaker.session import Session
 from sagemaker.model_metrics import ModelMetrics
-from sagemaker.deprecations import removed_kwargs
 from sagemaker.drift_check_baselines import DriftCheckBaselines
 from sagemaker.explainer import ExplainerConfig
 from sagemaker.metadata_properties import MetadataProperties
@@ -1386,6 +1385,7 @@ def deploy(
         routing_config: Optional[Dict[str, Any]] = None,
         model_reference_arn: Optional[str] = None,
         inference_ami_version: Optional[str] = None,
+        update_endpoint: Optional[bool] = False,
         **kwargs,
     ):
         """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -1497,6 +1497,11 @@ def deploy(
             inference_ami_version (Optional [str]): Specifies an option from a collection of preconfigured
              Amazon Machine Image (AMI) images. For a full list of options, see:
              https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_ProductionVariant.html
+            update_endpoint (Optional[bool]):
+                Flag to update the model in an existing Amazon SageMaker endpoint.
+                If True, this will deploy a new EndpointConfig to an already existing endpoint
+                and delete resources corresponding to the previous EndpointConfig. Default: False
+                Note: Currently this is supported for single model endpoints
         Raises:
              ValueError: If arguments combination check failed in these circumstances:
                 - If no role is specified or
@@ -1512,8 +1517,6 @@ def deploy(
         """
         self.accept_eula = accept_eula
 
-        removed_kwargs("update_endpoint", kwargs)
-
         self._init_sagemaker_session_if_does_not_exist(instance_type)
         # Depending on the instance type, a local session (or) a session is initialized.
         self.role = resolve_value_from_config(
@@ -1628,6 +1631,10 @@ def deploy(
 
         # Support multiple models on same endpoint
         if endpoint_type == EndpointType.INFERENCE_COMPONENT_BASED:
+            if update_endpoint:
+                raise ValueError(
+                    "Currently update_endpoint is supported for single model endpoints"
+                )
             if endpoint_name:
                 self.endpoint_name = endpoint_name
             else:
@@ -1783,17 +1790,38 @@ def deploy(
             if is_explainer_enabled:
                 explainer_config_dict = explainer_config._to_request_dict()
 
-            self.sagemaker_session.endpoint_from_production_variants(
-                name=self.endpoint_name,
-                production_variants=[production_variant],
-                tags=tags,
-                kms_key=kms_key,
-                wait=wait,
-                data_capture_config_dict=data_capture_config_dict,
-                explainer_config_dict=explainer_config_dict,
-                async_inference_config_dict=async_inference_config_dict,
-                live_logging=endpoint_logging,
-            )
+            if update_endpoint:
+                endpoint_config_name = self.sagemaker_session.create_endpoint_config(
+                    name=self.name,
+                    model_name=self.name,
+                    initial_instance_count=initial_instance_count,
+                    instance_type=instance_type,
+                    accelerator_type=accelerator_type,
+                    tags=tags,
+                    kms_key=kms_key,
+                    data_capture_config_dict=data_capture_config_dict,
+                    volume_size=volume_size,
+                    model_data_download_timeout=model_data_download_timeout,
+                    container_startup_health_check_timeout=container_startup_health_check_timeout,
+                    explainer_config_dict=explainer_config_dict,
+                    async_inference_config_dict=async_inference_config_dict,
+                    serverless_inference_config=serverless_inference_config_dict,
+                    routing_config=routing_config,
+                    inference_ami_version=inference_ami_version,
+                )
+                self.sagemaker_session.update_endpoint(self.endpoint_name, endpoint_config_name)
+            else:
+                self.sagemaker_session.endpoint_from_production_variants(
+                    name=self.endpoint_name,
+                    production_variants=[production_variant],
+                    tags=tags,
+                    kms_key=kms_key,
+                    wait=wait,
+                    data_capture_config_dict=data_capture_config_dict,
+                    explainer_config_dict=explainer_config_dict,
+                    async_inference_config_dict=async_inference_config_dict,
+                    live_logging=endpoint_logging,
+                )
 
             if self.predictor_cls:
                 predictor = self.predictor_cls(self.endpoint_name, self.sagemaker_session)
diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py
@@ -1602,6 +1602,7 @@ def deploy(
                 ResourceRequirements,
             ]
         ] = None,
+        update_endpoint: Optional[bool] = False,
     ) -> Union[Predictor, Transformer]:
         """Deploys the built Model.
 
@@ -1615,24 +1616,33 @@ def deploy(
                AsyncInferenceConfig, BatchTransformInferenceConfig, ResourceRequirements]]) :
                 Additional Config for different deployment types such as
                 serverless, async, batch and multi-model/container
+            update_endpoint (Optional[bool]):
+                Flag to update the model in an existing Amazon SageMaker endpoint.
+                If True, this will deploy a new EndpointConfig to an already existing endpoint
+                and delete resources corresponding to the previous EndpointConfig. Default: False
+                Note: Currently this is supported for single model endpoints
         Returns:
             Transformer for Batch Deployments
             Predictors for all others
         """
         if not hasattr(self, "built_model"):
             raise ValueError("Model Needs to be built before deploying")
-        endpoint_name = unique_name_from_base(endpoint_name)
+        if not update_endpoint:
+            endpoint_name = unique_name_from_base(endpoint_name)
+
         if not inference_config:  # Real-time Deployment
             return self.built_model.deploy(
                 instance_type=self.instance_type,
                 initial_instance_count=initial_instance_count,
                 endpoint_name=endpoint_name,
+                update_endpoint=update_endpoint,
             )
 
         if isinstance(inference_config, ServerlessInferenceConfig):
             return self.built_model.deploy(
                 serverless_inference_config=inference_config,
                 endpoint_name=endpoint_name,
+                update_endpoint=update_endpoint,
             )
 
         if isinstance(inference_config, AsyncInferenceConfig):
@@ -1641,6 +1651,7 @@ def deploy(
                 initial_instance_count=initial_instance_count,
                 async_inference_config=inference_config,
                 endpoint_name=endpoint_name,
+                update_endpoint=update_endpoint,
             )
 
         if isinstance(inference_config, BatchTransformInferenceConfig):
@@ -1652,6 +1663,10 @@ def deploy(
             return transformer
 
         if isinstance(inference_config, ResourceRequirements):
+            if update_endpoint:
+                raise ValueError(
+                    "Currently update_endpoint is supported for single model endpoints"
+                )
             # Multi Model and MultiContainer endpoints with Inference Component
             return self.built_model.deploy(
                 instance_type=self.instance_type,
@@ -1660,6 +1675,7 @@ def deploy(
                 resources=inference_config,
                 initial_instance_count=initial_instance_count,
                 role=self.role_arn,
+                update_endpoint=update_endpoint,
             )
 
         raise ValueError("Deployment Options not supported")
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -4488,6 +4488,10 @@ def create_endpoint_config(
         model_data_download_timeout=None,
         container_startup_health_check_timeout=None,
         explainer_config_dict=None,
+        async_inference_config_dict=None,
+        serverless_inference_config_dict=None,
+        routing_config: Optional[Dict[str, Any]] = None,
+        inference_ami_version: Optional[str] = None,
     ):
         """Create an Amazon SageMaker endpoint configuration.
 
@@ -4525,6 +4529,30 @@ def create_endpoint_config(
                 -inference-algo-ping-requests
             explainer_config_dict (dict): Specifies configuration to enable explainers.
                 Default: None.
+            async_inference_config_dict (dict): Specifies
+                configuration related to async endpoint. Use this configuration when trying
+                to create async endpoint and make async inference. If empty config object
+                passed through, will use default config to deploy async endpoint. Deploy a
+                real-time endpoint if it's None. (default: None).
+            serverless_inference_config_dict (dict):
+                Specifies configuration related to serverless endpoint. Use this configuration
+                when trying to create serverless endpoint and make serverless inference. If
+                empty object passed through, will use pre-defined values in
+                ``ServerlessInferenceConfig`` class to deploy serverless endpoint. Deploy an
+                instance based endpoint if it's None. (default: None).
+            routing_config (Optional[Dict[str, Any]): Settings the control how the endpoint routes
+                incoming traffic to the instances that the endpoint hosts.
+                Currently, support dictionary key ``RoutingStrategy``.
+
+                .. code:: python
+
+                    {
+                        "RoutingStrategy":  sagemaker.enums.RoutingStrategy.RANDOM
+                    }
+            inference_ami_version (Optional [str]):
+             Specifies an option from a collection of preconfigured
+             Amazon Machine Image (AMI) images. For a full list of options, see:
+             https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_ProductionVariant.html
 
         Example:
             >>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}]
@@ -4544,9 +4572,12 @@ def create_endpoint_config(
             instance_type,
             initial_instance_count,
             accelerator_type=accelerator_type,
+            serverless_inference_config=serverless_inference_config_dict,
             volume_size=volume_size,
             model_data_download_timeout=model_data_download_timeout,
             container_startup_health_check_timeout=container_startup_health_check_timeout,
+            routing_config=routing_config,
+            inference_ami_version=inference_ami_version,
         )
         production_variants = [provided_production_variant]
         # Currently we just inject CoreDumpConfig.KmsKeyId from the config for production variant.
@@ -4586,6 +4617,14 @@ def create_endpoint_config(
             )
             request["DataCaptureConfig"] = inferred_data_capture_config_dict
 
+        if async_inference_config_dict is not None:
+            inferred_async_inference_config_dict = update_nested_dictionary_with_values_from_config(
+                async_inference_config_dict,
+                ENDPOINT_CONFIG_ASYNC_INFERENCE_PATH,
+                sagemaker_session=self,
+            )
+            request["AsyncInferenceConfig"] = inferred_async_inference_config_dict
+
         if explainer_config_dict is not None:
             request["ExplainerConfig"] = explainer_config_dict
 
diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py
@@ -358,6 +358,7 @@ def deploy(
         container_startup_health_check_timeout=None,
         inference_recommendation_id=None,
         explainer_config=None,
+        update_endpoint: Optional[bool] = False,
         **kwargs,
     ):
         """Deploy a Tensorflow ``Model`` to a SageMaker ``Endpoint``."""
@@ -383,6 +384,7 @@ def deploy(
             container_startup_health_check_timeout=container_startup_health_check_timeout,
             inference_recommendation_id=inference_recommendation_id,
             explainer_config=explainer_config,
+            update_endpoint=update_endpoint,
             **kwargs,
         )
 
diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py
@@ -794,7 +794,7 @@ def test_jumpstart_model_kwargs_match_parent_class(self):
         and reach out to JumpStart team."""
 
         init_args_to_skip: Set[str] = set(["model_reference_arn"])
-        deploy_args_to_skip: Set[str] = set(["kwargs", "model_reference_arn"])
+        deploy_args_to_skip: Set[str] = set(["kwargs", "model_reference_arn", "update_endpoint"])
         deploy_args_removed_at_deploy_time: Set[str] = set(["model_access_configs"])
 
         parent_class_init = Model.__init__
diff --git a/tests/unit/sagemaker/model/test_deploy.py b/tests/unit/sagemaker/model/test_deploy.py
diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py