aws · knakad · Aug 14, 2019 · Aug 7, 2019 · Aug 7, 2019 · Aug 14, 2019
@@ -83,7 +83,13 @@ def pipeline_container_def(self, instance_type):
         return sagemaker.pipeline_container_def(self.models, instance_type)
 
     def deploy(
-        self, initial_instance_count, instance_type, endpoint_name=None, tags=None, wait=True
+        self,
+        initial_instance_count,
+        instance_type,
+        endpoint_name=None,
+        tags=None,
+        wait=True,
+        update_endpoint=False,
     ):
         """Deploy this ``Model`` to an ``Endpoint`` and optionally return a
         ``Predictor``.
@@ -110,6 +116,11 @@ def deploy(
                 specific endpoint.
             wait (bool): Whether the call should wait until the deployment of
                 model completes (default: True).
+            update_endpoint (bool): Flag to update the model in an existing
+                Amazon SageMaker endpoint. If True, this will deploy a new
+                EndpointConfig to an already existing endpoint and delete
+                resources corresponding to the previous EndpointConfig. If
+                False, a new endpoint will be created. Default: False
 
         Returns:
             callable[string, sagemaker.session.Session] or None: Invocation of
@@ -130,9 +141,21 @@ def deploy(
             self.name, instance_type, initial_instance_count
         )
         self.endpoint_name = endpoint_name or self.name
-        self.sagemaker_session.endpoint_from_production_variants(
-            self.endpoint_name, [production_variant], tags, wait=wait
-        )
+
+        if update_endpoint:
+            endpoint_config_name = self.sagemaker_session.create_endpoint_config(
+                name=self.name,
+                model_name=self.name,
+                initial_instance_count=initial_instance_count,
+                instance_type=instance_type,
+                tags=tags,
+            )
+            self.sagemaker_session.update_endpoint(self.endpoint_name, endpoint_config_name)
+        else:
+            self.sagemaker_session.endpoint_from_production_variants(
+                self.endpoint_name, [production_variant], tags, wait=wait
+            )
+
         if self.predictor_cls:
             return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
         return None

@@ -14,6 +14,7 @@
 
 import json
 import os
+import time
 
 import pytest
 from tests.integ import DATA_DIR, TRANSFORM_DEFAULT_TIMEOUT_MINUTES
@@ -148,3 +149,68 @@ def test_inference_pipeline_model_deploy(sagemaker_session, cpu_instance_type):
     with pytest.raises(Exception) as exception:
         sagemaker_session.sagemaker_client.describe_model(ModelName=model.name)
         assert "Could not find model" in str(exception.value)
+
+
+def test_inference_pipeline_model_deploy_with_update_endpoint(sagemaker_session):
+    sparkml_data_path = os.path.join(DATA_DIR, "sparkml_model")
+    xgboost_data_path = os.path.join(DATA_DIR, "xgboost_model")
+    endpoint_name = "test-inference-pipeline-deploy-{}".format(sagemaker_timestamp())
+    sparkml_model_data = sagemaker_session.upload_data(
+        path=os.path.join(sparkml_data_path, "mleap_model.tar.gz"),
+        key_prefix="integ-test-data/sparkml/model",
+    )
+    xgb_model_data = sagemaker_session.upload_data(
+        path=os.path.join(xgboost_data_path, "xgb_model.tar.gz"),
+        key_prefix="integ-test-data/xgboost/model",
+    )
+
+    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
+        sparkml_model = SparkMLModel(
+            model_data=sparkml_model_data,
+            env={"SAGEMAKER_SPARKML_SCHEMA": SCHEMA},
+            sagemaker_session=sagemaker_session,
+        )
+        xgb_image = get_image_uri(sagemaker_session.boto_region_name, "xgboost")
+        xgb_model = Model(
+            model_data=xgb_model_data, image=xgb_image, sagemaker_session=sagemaker_session
+        )
+        model = PipelineModel(
+            models=[sparkml_model, xgb_model],
+            role="SageMakerRole",
+            sagemaker_session=sagemaker_session,
+        )
+        model.deploy(1, "ml.t2.medium", endpoint_name=endpoint_name)
+        old_endpoint = sagemaker_session.sagemaker_client.describe_endpoint(
+            EndpointName=endpoint_name
+        )
+        old_config_name = old_endpoint["EndpointConfigName"]
+
+        model.deploy(1, "ml.m4.xlarge", update_endpoint=True, endpoint_name=endpoint_name)
+
+        # Wait for endpoint to finish updating
+        max_retry_count = 40  # Endpoint update takes ~7min. 40 retries * 30s sleeps = 20min timeout
+        current_retry_count = 0
+        while current_retry_count <= max_retry_count:
+            if current_retry_count >= max_retry_count:
+                raise Exception("Endpoint status not 'InService' within expected timeout.")
+            time.sleep(30)
+            new_endpoint = sagemaker_session.sagemaker_client.describe_endpoint(
+                EndpointName=endpoint_name
+            )
+            current_retry_count += 1
+            if new_endpoint["EndpointStatus"] == "InService":
+                break
+
+        new_config_name = new_endpoint["EndpointConfigName"]
+        new_config = sagemaker_session.sagemaker_client.describe_endpoint_config(
+            EndpointConfigName=new_config_name
+        )
+
+        assert old_config_name != new_config_name
+        assert new_config["ProductionVariants"][0]["InstanceType"] == "ml.m4.xlarge"
+        assert new_config["ProductionVariants"][0]["InitialInstanceCount"] == 1
+
+    model.delete_model()
+    with pytest.raises(Exception) as exception:
+        sagemaker_session.sagemaker_client.describe_model(ModelName=model.name)
+        assert "Could not find model" in str(exception.value)
@@ -159,6 +159,41 @@ def test_deploy_endpoint_name(tfo, time, sagemaker_session):
     )
 
 
+@patch("tarfile.open")
+@patch("time.strftime", return_value=TIMESTAMP)
+def test_deploy_update_endpoint(tfo, time, sagemaker_session):
+    framework_model = DummyFrameworkModel(sagemaker_session)
+    endpoint_name = "endpoint-name"
+    sparkml_model = SparkMLModel(
+        model_data=MODEL_DATA_2, role=ROLE, sagemaker_session=sagemaker_session
+    )
+    model = PipelineModel(
+        models=[framework_model, sparkml_model], role=ROLE, sagemaker_session=sagemaker_session
+    )
+    model.deploy(
+        instance_type=INSTANCE_TYPE,
+        initial_instance_count=1,
+        endpoint_name=endpoint_name,
+        update_endpoint=True,
+    )
+
+    sagemaker_session.create_endpoint_config.assert_called_with(
+        name=model.name,
+        model_name=model.name,
+        initial_instance_count=INSTANCE_COUNT,
+        instance_type=INSTANCE_TYPE,
+        tags=None,
+    )
+    config_name = sagemaker_session.create_endpoint_config(
+        name=model.name,
+        model_name=model.name,
+        initial_instance_count=INSTANCE_COUNT,
+        instance_type=INSTANCE_TYPE,
+    )
+    sagemaker_session.update_endpoint.assert_called_with(endpoint_name, config_name)
+    sagemaker_session.create_endpoint.assert_not_called()
+
+
 @patch("tarfile.open")
 @patch("time.strftime", return_value=TIMESTAMP)
 def test_transformer(tfo, time, sagemaker_session):