From 7f59e14ec1da64210bd86a57a5ce433b72468b27 Mon Sep 17 00:00:00 2001
From: Chuyang <chuyangdeng@gmail.com>
Date: Thu, 11 Jun 2020 00:30:13 -0700
Subject: [PATCH] Revert "feature: Support for multi variant endpoint
 invocation with target variant param (#1571)"

This reverts commit b481c36be38d08a72d23b15251d465331a1b7d86.
---
 src/sagemaker/local/local_session.py       |   4 -
 src/sagemaker/predictor.py                 |  13 +-
 tests/integ/test_multi_variant_endpoint.py | 309 ---------------------
 tests/unit/test_predictor.py               |  26 --
 4 files changed, 3 insertions(+), 349 deletions(-)
 delete mode 100644 tests/integ/test_multi_variant_endpoint.py

diff --git a/src/sagemaker/local/local_session.py b/src/sagemaker/local/local_session.py
index 6c23cea45a..1e80f6e1b4 100644
--- a/src/sagemaker/local/local_session.py
+++ b/src/sagemaker/local/local_session.py
@@ -343,7 +343,6 @@ def invoke_endpoint(
         Accept=None,
         CustomAttributes=None,
         TargetModel=None,
-        TargetVariant=None,
     ):
         """
 
@@ -371,9 +370,6 @@ def invoke_endpoint(
         if TargetModel is not None:
             headers["X-Amzn-SageMaker-Target-Model"] = TargetModel
 
-        if TargetVariant is not None:
-            headers["X-Amzn-SageMaker-Target-Variant"] = TargetVariant
-
         r = self.http.request("POST", url, body=Body, preload_content=False, headers=headers)
 
         return {"Body": r, "ContentType": Accept}
diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py
index 0f1efb4e18..80da8a551c 100644
--- a/src/sagemaker/predictor.py
+++ b/src/sagemaker/predictor.py
@@ -83,7 +83,7 @@ def __init__(
         self._endpoint_config_name = self._get_endpoint_config_name()
         self._model_names = self._get_model_names()
 
-    def predict(self, data, initial_args=None, target_model=None, target_variant=None):
+    def predict(self, data, initial_args=None, target_model=None):
         """Return the inference from the specified endpoint.
 
         Args:
@@ -98,9 +98,6 @@ def predict(self, data, initial_args=None, target_model=None, target_variant=Non
             target_model (str): S3 model artifact path to run an inference request on,
                 in case of a multi model endpoint. Does not apply to endpoints hosting
                 single model (Default: None)
-            target_variant (str): The name of the production variant to run an inference
-            request on (Default: None). Note that the ProductionVariant identifies the model
-            you want to host and the resources you want to deploy for hosting it.
 
         Returns:
             object: Inference for the given input. If a deserializer was specified when creating
@@ -109,7 +106,7 @@ def predict(self, data, initial_args=None, target_model=None, target_variant=Non
                 as is.
         """
 
-        request_args = self._create_request_args(data, initial_args, target_model, target_variant)
+        request_args = self._create_request_args(data, initial_args, target_model)
         response = self.sagemaker_session.sagemaker_runtime_client.invoke_endpoint(**request_args)
         return self._handle_response(response)
 
@@ -126,13 +123,12 @@ def _handle_response(self, response):
         response_body.close()
         return data
 
-    def _create_request_args(self, data, initial_args=None, target_model=None, target_variant=None):
+    def _create_request_args(self, data, initial_args=None, target_model=None):
         """
         Args:
             data:
             initial_args:
             target_model:
-            target_variant:
         """
         args = dict(initial_args) if initial_args else {}
 
@@ -148,9 +144,6 @@ def _create_request_args(self, data, initial_args=None, target_model=None, targe
         if target_model:
             args["TargetModel"] = target_model
 
-        if target_variant:
-            args["TargetVariant"] = target_variant
-
         if self.serializer is not None:
             data = self.serializer(data)
 
diff --git a/tests/integ/test_multi_variant_endpoint.py b/tests/integ/test_multi_variant_endpoint.py
deleted file mode 100644
index 7d1fab7645..0000000000
--- a/tests/integ/test_multi_variant_endpoint.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import json
-import os
-import math
-import pytest
-import scipy.stats as st
-
-from sagemaker.s3 import S3Uploader
-from sagemaker.session import production_variant
-from sagemaker.sparkml import SparkMLModel
-from sagemaker.utils import sagemaker_timestamp
-from sagemaker.content_types import CONTENT_TYPE_CSV
-from sagemaker.utils import unique_name_from_base
-from sagemaker.amazon.amazon_estimator import get_image_uri
-from sagemaker.predictor import csv_serializer, RealTimePredictor
-
-
-import tests.integ
-
-
-ROLE = "SageMakerRole"
-MODEL_NAME = "test-xgboost-model-{}".format(sagemaker_timestamp())
-ENDPOINT_NAME = unique_name_from_base("integ-test-multi-variant-endpoint")
-DEFAULT_REGION = "us-west-2"
-DEFAULT_INSTANCE_TYPE = "ml.m5.xlarge"
-DEFAULT_INSTANCE_COUNT = 1
-XG_BOOST_MODEL_LOCAL_PATH = os.path.join(tests.integ.DATA_DIR, "xgboost_model", "xgb_model.tar.gz")
-
-TEST_VARIANT_1 = "Variant1"
-TEST_VARIANT_1_WEIGHT = 0.3
-
-TEST_VARIANT_2 = "Variant2"
-TEST_VARIANT_2_WEIGHT = 0.7
-
-VARIANT_TRAFFIC_SAMPLING_COUNT = 100
-DESIRED_CONFIDENCE_FOR_VARIANT_TRAFFIC_DISTRIBUTION = 0.999
-
-TEST_CSV_DATA = "42,42,42,42,42,42,42"
-
-SPARK_ML_MODEL_LOCAL_PATH = os.path.join(
-    tests.integ.DATA_DIR, "sparkml_model", "mleap_model.tar.gz"
-)
-SPARK_ML_MODEL_ENDPOINT_NAME = unique_name_from_base("integ-test-target-variant-sparkml")
-SPARK_ML_DEFAULT_VARIANT_NAME = (
-    "AllTraffic"
-)  # default defined in src/sagemaker/session.py def production_variant
-SPARK_ML_WRONG_VARIANT_NAME = "WRONG_VARIANT"
-SPARK_ML_TEST_DATA = "1.0,C,38.0,71.5,1.0,female"
-SPARK_ML_MODEL_SCHEMA = json.dumps(
-    {
-        "input": [
-            {"name": "Pclass", "type": "float"},
-            {"name": "Embarked", "type": "string"},
-            {"name": "Age", "type": "float"},
-            {"name": "Fare", "type": "float"},
-            {"name": "SibSp", "type": "float"},
-            {"name": "Sex", "type": "string"},
-        ],
-        "output": {"name": "features", "struct": "vector", "type": "double"},
-    }
-)
-
-
-@pytest.fixture(scope="module")
-def multi_variant_endpoint(sagemaker_session):
-    """
-    Sets up the multi variant endpoint before the integration tests run.
-    Cleans up the multi variant endpoint after the integration tests run.
-    """
-
-    with tests.integ.timeout.timeout_and_delete_endpoint_by_name(
-        endpoint_name=ENDPOINT_NAME, sagemaker_session=sagemaker_session, hours=2
-    ):
-
-        # Creating a model
-        bucket = sagemaker_session.default_bucket()
-        prefix = "sagemaker/DEMO-VariantTargeting"
-        model_url = S3Uploader.upload(
-            local_path=XG_BOOST_MODEL_LOCAL_PATH,
-            desired_s3_uri="s3://" + bucket + "/" + prefix,
-            session=sagemaker_session,
-        )
-
-        image_uri = get_image_uri(sagemaker_session.boto_session.region_name, "xgboost", "0.90-1")
-
-        multi_variant_endpoint_model = sagemaker_session.create_model(
-            name=MODEL_NAME,
-            role=ROLE,
-            container_defs={"Image": image_uri, "ModelDataUrl": model_url},
-        )
-
-        # Creating a multi variant endpoint
-        variant1 = production_variant(
-            model_name=MODEL_NAME,
-            instance_type=DEFAULT_INSTANCE_TYPE,
-            initial_instance_count=DEFAULT_INSTANCE_COUNT,
-            variant_name=TEST_VARIANT_1,
-            initial_weight=TEST_VARIANT_1_WEIGHT,
-        )
-        variant2 = production_variant(
-            model_name=MODEL_NAME,
-            instance_type=DEFAULT_INSTANCE_TYPE,
-            initial_instance_count=DEFAULT_INSTANCE_COUNT,
-            variant_name=TEST_VARIANT_2,
-            initial_weight=TEST_VARIANT_2_WEIGHT,
-        )
-        sagemaker_session.endpoint_from_production_variants(
-            name=ENDPOINT_NAME, production_variants=[variant1, variant2]
-        )
-
-        # Yield to run the integration tests
-        yield multi_variant_endpoint
-
-        # Cleanup resources
-        sagemaker_session.delete_model(multi_variant_endpoint_model)
-        sagemaker_session.sagemaker_client.delete_endpoint_config(EndpointConfigName=ENDPOINT_NAME)
-
-    # Validate resource cleanup
-    with pytest.raises(Exception) as exception:
-        sagemaker_session.sagemaker_client.describe_model(
-            ModelName=multi_variant_endpoint_model.name
-        )
-        assert "Could not find model" in str(exception.value)
-        sagemaker_session.sagemaker_client.describe_endpoint_config(name=ENDPOINT_NAME)
-        assert "Could not find endpoint" in str(exception.value)
-
-
-def test_target_variant_invocation(sagemaker_session, multi_variant_endpoint):
-
-    response = sagemaker_session.sagemaker_runtime_client.invoke_endpoint(
-        EndpointName=ENDPOINT_NAME,
-        Body=TEST_CSV_DATA,
-        ContentType=CONTENT_TYPE_CSV,
-        Accept=CONTENT_TYPE_CSV,
-        TargetVariant=TEST_VARIANT_1,
-    )
-    assert response["InvokedProductionVariant"] == TEST_VARIANT_1
-
-    response = sagemaker_session.sagemaker_runtime_client.invoke_endpoint(
-        EndpointName=ENDPOINT_NAME,
-        Body=TEST_CSV_DATA,
-        ContentType=CONTENT_TYPE_CSV,
-        Accept=CONTENT_TYPE_CSV,
-        TargetVariant=TEST_VARIANT_2,
-    )
-    assert response["InvokedProductionVariant"] == TEST_VARIANT_2
-
-
-def test_predict_invocation_with_target_variant(sagemaker_session, multi_variant_endpoint):
-    predictor = RealTimePredictor(
-        endpoint=ENDPOINT_NAME,
-        sagemaker_session=sagemaker_session,
-        serializer=csv_serializer,
-        content_type=CONTENT_TYPE_CSV,
-        accept=CONTENT_TYPE_CSV,
-    )
-
-    # Validate that no exception is raised when the target_variant is specified.
-    predictor.predict(TEST_CSV_DATA, target_variant=TEST_VARIANT_1)
-    predictor.predict(TEST_CSV_DATA, target_variant=TEST_VARIANT_2)
-
-
-def test_variant_traffic_distribution(sagemaker_session, multi_variant_endpoint):
-    variant_1_invocation_count = 0
-    variant_2_invocation_count = 0
-
-    for i in range(0, VARIANT_TRAFFIC_SAMPLING_COUNT):
-        response = sagemaker_session.sagemaker_runtime_client.invoke_endpoint(
-            EndpointName=ENDPOINT_NAME,
-            Body=TEST_CSV_DATA,
-            ContentType=CONTENT_TYPE_CSV,
-            Accept=CONTENT_TYPE_CSV,
-        )
-        if response["InvokedProductionVariant"] == TEST_VARIANT_1:
-            variant_1_invocation_count += 1
-        elif response["InvokedProductionVariant"] == TEST_VARIANT_2:
-            variant_2_invocation_count += 1
-
-    assert variant_1_invocation_count + variant_2_invocation_count == VARIANT_TRAFFIC_SAMPLING_COUNT
-
-    variant_1_invocation_percentage = float(variant_1_invocation_count) / float(
-        VARIANT_TRAFFIC_SAMPLING_COUNT
-    )
-    variant_1_margin_of_error = _compute_and_retrieve_margin_of_error(TEST_VARIANT_1_WEIGHT)
-    assert variant_1_invocation_percentage < TEST_VARIANT_1_WEIGHT + variant_1_margin_of_error
-    assert variant_1_invocation_percentage > TEST_VARIANT_1_WEIGHT - variant_1_margin_of_error
-
-    variant_2_invocation_percentage = float(variant_2_invocation_count) / float(
-        VARIANT_TRAFFIC_SAMPLING_COUNT
-    )
-    variant_2_margin_of_error = _compute_and_retrieve_margin_of_error(TEST_VARIANT_2_WEIGHT)
-    assert variant_2_invocation_percentage < TEST_VARIANT_2_WEIGHT + variant_2_margin_of_error
-    assert variant_2_invocation_percentage > TEST_VARIANT_2_WEIGHT - variant_2_margin_of_error
-
-
-def test_spark_ml_predict_invocation_with_target_variant(sagemaker_session):
-    model_data = sagemaker_session.upload_data(
-        path=SPARK_ML_MODEL_LOCAL_PATH, key_prefix="integ-test-data/sparkml/model"
-    )
-
-    with tests.integ.timeout.timeout_and_delete_endpoint_by_name(
-        SPARK_ML_MODEL_ENDPOINT_NAME, sagemaker_session
-    ):
-        spark_ml_model = SparkMLModel(
-            model_data=model_data,
-            role=ROLE,
-            sagemaker_session=sagemaker_session,
-            env={"SAGEMAKER_SPARKML_SCHEMA": SPARK_ML_MODEL_SCHEMA},
-        )
-
-        predictor = spark_ml_model.deploy(
-            DEFAULT_INSTANCE_COUNT,
-            DEFAULT_INSTANCE_TYPE,
-            endpoint_name=SPARK_ML_MODEL_ENDPOINT_NAME,
-        )
-
-        # Validate that no exception is raised when the target_variant is specified.
-        predictor.predict(SPARK_ML_TEST_DATA, target_variant=SPARK_ML_DEFAULT_VARIANT_NAME)
-
-        with pytest.raises(Exception) as exception_info:
-            predictor.predict(SPARK_ML_TEST_DATA, target_variant=SPARK_ML_WRONG_VARIANT_NAME)
-
-        assert "ValidationError" in str(exception_info.value)
-        assert SPARK_ML_WRONG_VARIANT_NAME in str(exception_info.value)
-
-        # cleanup resources
-        spark_ml_model.delete_model()
-        sagemaker_session.sagemaker_client.delete_endpoint_config(
-            EndpointConfigName=SPARK_ML_MODEL_ENDPOINT_NAME
-        )
-
-    # Validate resource cleanup
-    with pytest.raises(Exception) as exception:
-        sagemaker_session.sagemaker_client.describe_model(ModelName=spark_ml_model.name)
-        assert "Could not find model" in str(exception.value)
-        sagemaker_session.sagemaker_client.describe_endpoint_config(
-            name=SPARK_ML_MODEL_ENDPOINT_NAME
-        )
-        assert "Could not find endpoint" in str(exception.value)
-
-
-@pytest.mark.local_mode
-def test_target_variant_invocation_local_mode(sagemaker_session, multi_variant_endpoint):
-
-    if sagemaker_session._region_name is None:
-        sagemaker_session._region_name = DEFAULT_REGION
-
-    response = sagemaker_session.sagemaker_runtime_client.invoke_endpoint(
-        EndpointName=ENDPOINT_NAME,
-        Body=TEST_CSV_DATA,
-        ContentType=CONTENT_TYPE_CSV,
-        Accept=CONTENT_TYPE_CSV,
-        TargetVariant=TEST_VARIANT_1,
-    )
-    assert response["InvokedProductionVariant"] == TEST_VARIANT_1
-
-    response = sagemaker_session.sagemaker_runtime_client.invoke_endpoint(
-        EndpointName=ENDPOINT_NAME,
-        Body=TEST_CSV_DATA,
-        ContentType=CONTENT_TYPE_CSV,
-        Accept=CONTENT_TYPE_CSV,
-        TargetVariant=TEST_VARIANT_2,
-    )
-    assert response["InvokedProductionVariant"] == TEST_VARIANT_2
-
-
-@pytest.mark.local_mode
-def test_predict_invocation_with_target_variant_local_mode(
-    sagemaker_session, multi_variant_endpoint
-):
-
-    if sagemaker_session._region_name is None:
-        sagemaker_session._region_name = DEFAULT_REGION
-
-    predictor = RealTimePredictor(
-        endpoint=ENDPOINT_NAME,
-        sagemaker_session=sagemaker_session,
-        serializer=csv_serializer,
-        content_type=CONTENT_TYPE_CSV,
-        accept=CONTENT_TYPE_CSV,
-    )
-
-    # Validate that no exception is raised when the target_variant is specified.
-    predictor.predict(TEST_CSV_DATA, target_variant=TEST_VARIANT_1)
-    predictor.predict(TEST_CSV_DATA, target_variant=TEST_VARIANT_2)
-
-
-def _compute_and_retrieve_margin_of_error(variant_weight):
-    """
-    Computes the margin of error using the Wald method for computing the confidence
-    intervals of a binomial distribution.
-    """
-    z_value = st.norm.ppf(DESIRED_CONFIDENCE_FOR_VARIANT_TRAFFIC_DISTRIBUTION)
-    margin_of_error = (variant_weight * (1 - variant_weight)) / VARIANT_TRAFFIC_SAMPLING_COUNT
-    margin_of_error = z_value * math.sqrt(margin_of_error)
-    return margin_of_error
diff --git a/tests/unit/test_predictor.py b/tests/unit/test_predictor.py
index b74f36e2b3..9c7feb0a83 100644
--- a/tests/unit/test_predictor.py
+++ b/tests/unit/test_predictor.py
@@ -346,7 +346,6 @@ def test_numpy_deser_from_npy_object_array():
 CSV_CONTENT_TYPE = "text/csv"
 RETURN_VALUE = 0
 CSV_RETURN_VALUE = "1,2,3\r\n"
-PRODUCTION_VARIANT_1 = "PRODUCTION_VARIANT_1"
 
 ENDPOINT_DESC = {"EndpointConfigName": ENDPOINT}
 
@@ -408,31 +407,6 @@ def test_predict_call_with_headers():
     assert result == RETURN_VALUE
 
 
-def test_predict_call_with_target_variant():
-    sagemaker_session = empty_sagemaker_session()
-    predictor = RealTimePredictor(
-        ENDPOINT, sagemaker_session, content_type=DEFAULT_CONTENT_TYPE, accept=DEFAULT_CONTENT_TYPE
-    )
-
-    data = "untouched"
-    result = predictor.predict(data, target_variant=PRODUCTION_VARIANT_1)
-
-    assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called
-
-    expected_request_args = {
-        "Accept": DEFAULT_CONTENT_TYPE,
-        "Body": data,
-        "ContentType": DEFAULT_CONTENT_TYPE,
-        "EndpointName": ENDPOINT,
-        "TargetVariant": PRODUCTION_VARIANT_1,
-    }
-
-    call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args
-    assert kwargs == expected_request_args
-
-    assert result == RETURN_VALUE
-
-
 def test_multi_model_predict_call_with_headers():
     sagemaker_session = empty_sagemaker_session()
     predictor = RealTimePredictor(