From 69657587d2b3d831e969e502050038b58219ba98 Mon Sep 17 00:00:00 2001
From: pialidas <pialidas@amazon.com>
Date: Wed, 10 Jun 2020 11:59:16 -0400
Subject: [PATCH 1/9] Adding selectable inference for automl generated model

1. Added attach() method to AutoML to allow attaching an AutoML object
to an existing AutoMLJob
2. Added create_model() method to AutoML that returns a PipelineModel
that can be used by the
3. Added a util function validate_and_update_inference_response() to
update the inference container list based on the requested
inference response keys
4. Updated and added unit tests
---
 src/sagemaker/automl/automl.py              | 356 ++++++++++++++------
 tests/data/automl/data/iris_transform.csv   |  15 +
 tests/integ/test_auto_ml.py                 |  65 ++++
 tests/unit/sagemaker/automl/test_auto_ml.py | 186 ++++++++--
 4 files changed, 492 insertions(+), 130 deletions(-)
 create mode 100644 tests/data/automl/data/iris_transform.csv

diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py
index 794e03aee1..b060d7e369 100644
--- a/src/sagemaker/automl/automl.py
+++ b/src/sagemaker/automl/automl.py
@@ -100,6 +100,61 @@ def fit(self, inputs=None, wait=True, logs=True, job_name=None):
         if wait:
             self.latest_auto_ml_job.wait(logs=logs)
 
+    @classmethod
+    def attach(cls, job_name, sagemaker_session=None):
+        """Attach to an existing AutoML job.
+
+        Args:
+            job_name (str): AutoML job name
+            sagemaker_session (sagemaker.session.Session): A SageMaker Session
+                object, used for SageMaker interactions (default: None). If not
+                specified, the one originally associated with the ``AutoML`` instance is used.:
+
+        Returns:
+
+        """
+        sagemaker_session = sagemaker_session or Session()
+
+        _auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(job_name)
+        automl_job_tags = sagemaker_session.sagemaker_client.list_tags(
+            _auto_ml_job_desc["AutoMLJobArn"]
+        )
+        amlj = AutoML(
+            role=_auto_ml_job_desc["RoleArn"],
+            target_attribute_name=_auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"],
+            problem_type=_auto_ml_job_desc["ProblemType"],
+            output_path=_auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"],
+            output_kms_key=_auto_ml_job_desc["OutputDataConfig"].get("KmsKeyId"),
+            base_job_name=job_name,
+            sagemaker_session=sagemaker_session,
+            volume_kms_key=_auto_ml_job_desc["AutoMLJobConfig"]
+            .get("SecurityConfig", {})
+            .get("VolumeKmsKeyId"),
+            encrypt_inter_container_traffic=_auto_ml_job_desc["AutoMLJobConfig"]
+            .get("SecurityConfig", {})
+            .get("EnableInterContainerTrafficEncryption", False),
+            vpc_config=_auto_ml_job_desc["AutoMLJobConfig"]
+            .get("SecurityConfig", {})
+            .get("VpcConfig"),
+            max_candidates=_auto_ml_job_desc["AutoMLJobConfig"]["CompletionCriteria"][
+                "MaxCandidates"
+            ],
+            max_runtime_per_training_job_in_seconds=_auto_ml_job_desc["AutoMLJobConfig"][
+                "CompletionCriteria"
+            ]["MaxRuntimePerTrainingJobInSeconds"],
+            total_job_runtime_in_seconds=_auto_ml_job_desc["AutoMLJobConfig"]["CompletionCriteria"][
+                "MaxAutoMLJobRuntimeInSeconds"
+            ],
+            job_objective=_auto_ml_job_desc["AutoMLJobObjective"]["MetricName"],
+            generate_candidate_definitions_only=_auto_ml_job_desc[
+                "GenerateCandidateDefinitionsOnly"
+            ],
+            tags=automl_job_tags,
+        )
+        amlj.current_job_name = job_name
+        amlj._auto_ml_job_desc = _auto_ml_job_desc
+        return amlj
+
     def describe_auto_ml_job(self, job_name=None):
         """Returns the job description of an AutoML job for the given job name.
 
@@ -187,49 +242,29 @@ def list_candidates(
 
         return self.sagemaker_session.list_candidates(**list_candidates_args)["Candidates"]
 
-    def deploy(
+    def create_model(
         self,
-        initial_instance_count,
-        instance_type,
-        candidate=None,
+        name,
         sagemaker_session=None,
-        name=None,
-        endpoint_name=None,
-        tags=None,
-        wait=True,
-        update_endpoint=False,
+        candidate=None,
         vpc_config=None,
         enable_network_isolation=False,
         model_kms_key=None,
         predictor_cls=None,
+        inference_response_keys=None,
     ):
-        """Deploy a candidate to a SageMaker Inference Pipeline and return a Predictor
+        """Creates a model from a given candidate or the best candidate
+        from the automl job
 
         Args:
-            initial_instance_count (int): The initial number of instances to run
-                in the ``Endpoint`` created from this ``Model``.
-            instance_type (str): The EC2 instance type to deploy this Model to.
-                For example, 'ml.p2.xlarge'.
+            name (str): The pipeline model name.
+            sagemaker_session (sagemaker.session.Session): A SageMaker Session
+                object, used for SageMaker interactions (default: None). If not
+                specified, the one originally associated with the ``AutoML`` instance is used.:
             candidate (CandidateEstimator or dict): a CandidateEstimator used for deploying
                 to a SageMaker Inference Pipeline. If None, the best candidate will
                 be used. If the candidate input is a dict, a CandidateEstimator will be
                 created from it.
-            sagemaker_session (sagemaker.session.Session): A SageMaker Session
-                object, used for SageMaker interactions (default: None). If not
-                specified, the one originally associated with the ``AutoML`` instance is used.
-            name (str): The pipeline model name. If None, a default model name will
-                be selected on each ``deploy``.
-            endpoint_name (str): The name of the endpoint to create (default:
-                None). If not specified, a unique endpoint name will be created.
-            tags (List[dict[str, str]]): The list of tags to attach to this
-                specific endpoint.
-            wait (bool): Whether the call should wait until the deployment of
-                model completes (default: True).
-            update_endpoint (bool): Flag to update the model in an existing
-                Amazon SageMaker endpoint. If True, this will deploy a new
-                EndpointConfig to an already existing endpoint and delete
-                resources corresponding to the previous EndpointConfig. If
-                False, a new endpoint will be created. Default: False
             vpc_config (dict): Specifies a VPC that your training jobs and hosted models have
                 access to. Contents include "SecurityGroupIds" and "Subnets".
             enable_network_isolation (bool): Isolates the training container. No inbound or
@@ -241,11 +276,12 @@ def deploy(
                 function to call to create a predictor (default: None). If
                 specified, ``deploy()``  returns the result of invoking this
                 function on the created endpoint name.
+            inference_response_keys (list): List of keys for response content. The order of the
+                keys will dictate the content order in the response.
 
         Returns:
-            callable[string, sagemaker.session.Session] or ``None``:
-                If ``predictor_cls`` is specified, the invocation of ``self.predictor_cls`` on
-                the created endpoint name. Otherwise, ``None``.
+            PipelineModel object
+
         """
         sagemaker_session = sagemaker_session or self.sagemaker_session
 
@@ -256,50 +292,46 @@ def deploy(
             candidate = CandidateEstimator(candidate, sagemaker_session=sagemaker_session)
 
         inference_containers = candidate.containers
-        endpoint_name = endpoint_name or self.current_job_name
-
-        return self._deploy_inference_pipeline(
-            inference_containers,
-            initial_instance_count=initial_instance_count,
-            instance_type=instance_type,
-            name=name,
-            sagemaker_session=sagemaker_session,
-            endpoint_name=endpoint_name,
-            tags=tags,
-            wait=wait,
-            update_endpoint=update_endpoint,
-            vpc_config=vpc_config,
-            enable_network_isolation=enable_network_isolation,
-            model_kms_key=model_kms_key,
-            predictor_cls=predictor_cls,
-        )
 
-    def _check_problem_type_and_job_objective(self, problem_type, job_objective):
-        """Validate if problem_type and job_objective are both None or are both provided.
+        self.validate_and_update_inference_response(inference_containers, inference_response_keys)
 
-        Args:
-            problem_type (str): The type of problem of this AutoMLJob. Valid values are
-                "Regression", "BinaryClassification", "MultiClassClassification".
-            job_objective (dict): AutoMLJob objective, contains "AutoMLJobObjectiveType" (optional),
-                "MetricName" and "Value".
+        # construct Model objects
+        models = []
 
-        Raises (ValueError): raises ValueError if one of problem_type and job_objective is provided
-            while the other is None.
+        for container in inference_containers:
+            image = container["Image"]
+            model_data = container["ModelDataUrl"]
+            env = container["Environment"]
 
-        """
-        if not (problem_type and job_objective) and (problem_type or job_objective):
-            raise ValueError(
-                "One of problem type and objective metric provided. "
-                "Either both of them should be provided or none of them should be provided."
+            model = Model(
+                image=image,
+                model_data=model_data,
+                role=self.role,
+                env=env,
+                vpc_config=vpc_config,
+                sagemaker_session=sagemaker_session or self.sagemaker_session,
+                enable_network_isolation=enable_network_isolation,
+                model_kms_key=model_kms_key,
             )
+            models.append(model)
+
+        pipeline = PipelineModel(
+            models=models,
+            role=self.role,
+            predictor_cls=predictor_cls,
+            name=name,
+            vpc_config=vpc_config,
+            sagemaker_session=sagemaker_session or self.sagemaker_session,
+        )
+        return pipeline
 
-    def _deploy_inference_pipeline(
+    def deploy(
         self,
-        inference_containers,
         initial_instance_count,
         instance_type,
-        name=None,
+        candidate=None,
         sagemaker_session=None,
+        name=None,
         endpoint_name=None,
         tags=None,
         wait=True,
@@ -308,21 +340,24 @@ def _deploy_inference_pipeline(
         enable_network_isolation=False,
         model_kms_key=None,
         predictor_cls=None,
+        inference_response_keys=None,
     ):
-        """Deploy a SageMaker Inference Pipeline.
+        """Deploy a candidate to a SageMaker Inference Pipeline and return a Predictor
 
         Args:
-            inference_containers (list): a list of inference container definitions
             initial_instance_count (int): The initial number of instances to run
                 in the ``Endpoint`` created from this ``Model``.
             instance_type (str): The EC2 instance type to deploy this Model to.
                 For example, 'ml.p2.xlarge'.
-            name (str): The pipeline model name. If None, a default model name will
-                be selected on each ``deploy``.
+            candidate (CandidateEstimator or dict): a CandidateEstimator used for deploying
+                to a SageMaker Inference Pipeline. If None, the best candidate will
+                be used. If the candidate input is a dict, a CandidateEstimator will be
+                created from it.
             sagemaker_session (sagemaker.session.Session): A SageMaker Session
                 object, used for SageMaker interactions (default: None). If not
-                specified, one is created using the default AWS configuration
-                chain.
+                specified, the one originally associated with the ``AutoML`` instance is used.
+            name (str): The pipeline model name. If None, a default model name will
+                be selected on each ``deploy``.
             endpoint_name (str): The name of the endpoint to create (default:
                 None). If not specified, a unique endpoint name will be created.
             tags (List[dict[str, str]]): The list of tags to attach to this
@@ -334,44 +369,38 @@ def _deploy_inference_pipeline(
                 EndpointConfig to an already existing endpoint and delete
                 resources corresponding to the previous EndpointConfig. If
                 False, a new endpoint will be created. Default: False
-            vpc_config (dict): information about vpc configuration, optionally
-                contains "SecurityGroupIds", "Subnets"
+            vpc_config (dict): Specifies a VPC that your training jobs and hosted models have
+                access to. Contents include "SecurityGroupIds" and "Subnets".
+            enable_network_isolation (bool): Isolates the training container. No inbound or
+                outbound network calls can be made, except for calls between peers within a
+                training cluster for distributed training. Default: False
             model_kms_key (str): KMS key ARN used to encrypt the repacked
                 model archive file if the model is repacked
             predictor_cls (callable[string, sagemaker.session.Session]): A
                 function to call to create a predictor (default: None). If
                 specified, ``deploy()``  returns the result of invoking this
                 function on the created endpoint name.
-        """
-        # construct Model objects
-        models = []
-        for container in inference_containers:
-            image = container["Image"]
-            model_data = container["ModelDataUrl"]
-            env = container["Environment"]
+            inference_response_keys (list): List of keys for response content. The order of the
+                keys will dictate the content order in the response.
 
-            model = Model(
-                image=image,
-                model_data=model_data,
-                role=self.role,
-                env=env,
-                vpc_config=vpc_config,
-                sagemaker_session=sagemaker_session or self.sagemaker_session,
-                enable_network_isolation=enable_network_isolation,
-                model_kms_key=model_kms_key,
-            )
-            models.append(model)
-
-        pipeline = PipelineModel(
-            models=models,
-            role=self.role,
-            predictor_cls=predictor_cls,
+        Returns:
+            callable[string, sagemaker.session.Session] or ``None``:
+                If ``predictor_cls`` is specified, the invocation of ``self.predictor_cls`` on
+                the created endpoint name. Otherwise, ``None``.
+        """
+        sagemaker_session = sagemaker_session or self.sagemaker_session
+        model = self.create_model(
             name=name,
+            sagemaker_session=sagemaker_session,
+            candidate=candidate,
+            inference_response_keys=inference_response_keys,
             vpc_config=vpc_config,
-            sagemaker_session=sagemaker_session or self.sagemaker_session,
+            enable_network_isolation=enable_network_isolation,
+            model_kms_key=model_kms_key,
+            predictor_cls=predictor_cls,
         )
 
-        return pipeline.deploy(
+        return model.deploy(
             initial_instance_count=initial_instance_count,
             instance_type=instance_type,
             endpoint_name=endpoint_name,
@@ -380,6 +409,25 @@ def _deploy_inference_pipeline(
             update_endpoint=update_endpoint,
         )
 
+    def _check_problem_type_and_job_objective(self, problem_type, job_objective):
+        """Validate if problem_type and job_objective are both None or are both provided.
+
+        Args:
+            problem_type (str): The type of problem of this AutoMLJob. Valid values are
+                "Regression", "BinaryClassification", "MultiClassClassification".
+            job_objective (dict): AutoMLJob objective, contains "AutoMLJobObjectiveType" (optional),
+                "MetricName" and "Value".
+
+        Raises (ValueError): raises ValueError if one of problem_type and job_objective is provided
+            while the other is None.
+
+        """
+        if not (problem_type and job_objective) and (problem_type or job_objective):
+            raise ValueError(
+                "One of problem type and objective metric provided. "
+                "Either both of them should be provided or none of them should be provided."
+            )
+
     def _prepare_for_auto_ml_job(self, job_name=None):
         """Set any values in the AutoMLJob that need to be set before creating request.
 
@@ -400,6 +448,114 @@ def _prepare_for_auto_ml_job(self, job_name=None):
         if self.output_path is None:
             self.output_path = "s3://{}/".format(self.sagemaker_session.default_bucket())
 
+    @classmethod
+    def _get_supported_inference_keys(cls, container, default=None):
+        """Returns the inference keys supported by the container.
+
+        Args:
+            container (dict): Dictionary representing container
+            default (object): The value to be returned if the container definition
+                              has no marker environment variable
+
+        Returns:
+            List of keys the container support or default
+
+        Raises:
+            Value error if the default is None and the container definition has
+            no marker environment variable.
+        """
+        try:
+            return [
+                x.strip()
+                for x in container["Environment"]["SAGEMAKER_INFERENCE_SUPPORT"].split(",")
+            ]
+        except KeyError:
+            if default is None:
+                raise
+        return default
+
+    @classmethod
+    def _check_inference_keys(cls, inference_response_keys, containers):
+        """Given an inference container list, checks if the pipeline supports the
+        requested inference keys
+
+        Args:
+            inference_response_keys (list): List of keys for inference response content
+            containers (list): list of inference container
+
+        Raises:
+            ValueError, if one or more keys in inference_response_keys are not supported
+            the inference pipeline.
+
+        """
+        if not inference_response_keys:
+            return
+        try:
+            supported_inference_keys = cls._get_supported_inference_keys(container=containers[-1])
+        except KeyError:
+            raise ValueError(
+                "The inference model does not support selection of inference content beyond "
+                "it's default content. Please retry without setting "
+                "inference_response_keys key word argument."
+            )
+        bad_keys = []
+        for key in inference_response_keys:
+            if key not in supported_inference_keys:
+                bad_keys.append(key)
+
+        if bad_keys:
+            raise ValueError(
+                "Requested inference output keys [{bad_keys_str}] are unsupported. "
+                "The supported inference keys are [{allowed_keys_str}]".format(
+                    bad_keys_str=", ".join(bad_keys),
+                    allowed_keys_str=", ".join(supported_inference_keys),
+                )
+            )
+
+    @classmethod
+    def validate_and_update_inference_response(cls, inference_containers, inference_response_keys):
+        """Validates the requested inference keys and updates inference containers to emit the
+        requested content in the inference response.
+
+        Args:
+            inference_containers (list): list of inference containers
+            inference_response_keys (list): list of inference response keys
+
+        Raises:
+            ValueError: if one or more of inference_response_keys are unsupported by the model
+
+        """
+        if not inference_response_keys:
+            return
+
+        cls._check_inference_keys(inference_response_keys, inference_containers)
+
+        previous_container_output = None
+
+        for container in inference_containers:
+            supported_inference_keys_container = cls._get_supported_inference_keys(
+                container, default=[]
+            )
+            if not supported_inference_keys_container:
+                previous_container_output = None
+                continue
+            current_container_output = None
+            for key in inference_response_keys:
+                if key in supported_inference_keys_container:
+                    current_container_output = (
+                        current_container_output + "," + key if current_container_output else key
+                    )
+
+            if previous_container_output:
+                container["Environment"].update(
+                    {"SAGEMAKER_INFERENCE_INPUT": previous_container_output}
+                )
+            if current_container_output:
+                container["Environment"].update(
+                    {"SAGEMAKER_INFERENCE_OUTPUT": current_container_output}
+                )
+            previous_container_output = current_container_output
+
 
 class AutoMLInput(object):
     """Accepts parameters that specify an S3 input for an auto ml job and provides
diff --git a/tests/data/automl/data/iris_transform.csv b/tests/data/automl/data/iris_transform.csv
new file mode 100644
index 0000000000..114f823d28
--- /dev/null
+++ b/tests/data/automl/data/iris_transform.csv
@@ -0,0 +1,15 @@
+6.4,2.8,5.6,2.2
+5.0,2.3,3.3,1.0
+4.9,2.5,4.5,1.7
+4.9,3.1,1.5,0.1
+5.7,3.8,1.7,0.3
+4.4,3.2,1.3,0.2
+5.4,3.4,1.5,0.4
+6.9,3.1,5.1,2.3
+6.7,3.1,4.4,1.4
+5.1,3.7,1.5,0.4
+5.2,2.7,3.9,1.4
+6.9,3.1,4.9,1.5
+5.8,4.0,1.2,0.2
+5.4,3.9,1.7,0.4
+7.7,3.8,6.7,2.2
\ No newline at end of file
diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py
index 26e90ecf74..ed27492f16 100644
--- a/tests/integ/test_auto_ml.py
+++ b/tests/integ/test_auto_ml.py
@@ -32,6 +32,7 @@
 DATA_DIR = os.path.join(DATA_DIR, "automl", "data")
 TRAINING_DATA = os.path.join(DATA_DIR, "iris_training.csv")
 TEST_DATA = os.path.join(DATA_DIR, "iris_test.csv")
+TRANSFORM_DATA = os.path.join(DATA_DIR, "iris_transform.csv")
 PROBLEM_TYPE = "MultiClassClassification"
 BASE_JOB_NAME = "auto-ml"
 
@@ -180,6 +181,42 @@ def test_auto_ml_describe_auto_ml_job(sagemaker_session):
     assert desc["OutputDataConfig"] == expected_default_output_config
 
 
+@pytest.mark.skipif(
+    tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
+    reason="AutoML is not supported in the region yet.",
+)
+def test_auto_ml_attach(sagemaker_session):
+    expected_default_input_config = [
+        {
+            "DataSource": {
+                "S3DataSource": {
+                    "S3DataType": "S3Prefix",
+                    "S3Uri": "s3://{}/{}/input/iris_training.csv".format(
+                        sagemaker_session.default_bucket(), PREFIX
+                    ),
+                }
+            },
+            "TargetAttributeName": TARGET_ATTRIBUTE_NAME,
+        }
+    ]
+    expected_default_output_config = {
+        "S3OutputPath": "s3://{}/".format(sagemaker_session.default_bucket())
+    }
+
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+
+    attached_automl_job = AutoML.attach(
+        job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session
+    )
+    attached_desc = attached_automl_job.describe_auto_ml_job()
+    assert attached_desc["AutoMLJobName"] == AUTO_ML_JOB_NAME
+    assert attached_desc["AutoMLJobStatus"] == "Completed"
+    assert isinstance(attached_desc["BestCandidate"], dict)
+    assert attached_desc["InputDataConfig"] == expected_default_input_config
+    assert attached_desc["AutoMLJobConfig"] == EXPECTED_DEFAULT_JOB_CONFIG
+    assert attached_desc["OutputDataConfig"] == expected_default_output_config
+
+
 @pytest.mark.skipif(
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
@@ -240,6 +277,34 @@ def test_deploy_best_candidate(sagemaker_session, cpu_instance_type):
     sagemaker_session.sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
 
 
+@pytest.mark.canary_quick
+def test_create_model_best_candidate(sagemaker_session, cpu_instance_type):
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+
+    auto_ml = AutoML.attach(job_name=AUTO_ML_JOB_NAME)
+    best_candidate = auto_ml.best_candidate()
+
+    with timeout(minutes=2):
+        pipeline_model = auto_ml.create_model(
+            candidate=best_candidate,
+            sagemaker_session=sagemaker_session,
+            vpc_config=None,
+            enable_network_isolation=False,
+            model_kms_key=None,
+            predictor_cls=None,
+        )
+    inputs = sagemaker_session.upload_data(
+        path=TRANSFORM_DATA, key_prefix=PREFIX + "/transform_input"
+    )
+    pipeline_model.transformer(
+        instance_count=1,
+        instance_type=cpu_instance_type,
+        assemble_with="Line",
+        output_path="s3://{}/{}".format(sagemaker_session.default_bucket(), "transform_test"),
+        accept="text/csv",
+    ).transform(data=inputs, content_type="text/csv", split_type="Line", join_source="Input")
+
+
 @pytest.mark.skipif(
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
diff --git a/tests/unit/sagemaker/automl/test_auto_ml.py b/tests/unit/sagemaker/automl/test_auto_ml.py
index 8ef0cd31da..5ff99e3b8d 100644
--- a/tests/unit/sagemaker/automl/test_auto_ml.py
+++ b/tests/unit/sagemaker/automl/test_auto_ml.py
@@ -14,7 +14,7 @@
 
 import pytest
 from mock import Mock, patch
-from sagemaker import AutoML, AutoMLJob, AutoMLInput, CandidateEstimator
+from sagemaker import AutoML, AutoMLJob, AutoMLInput, CandidateEstimator, PipelineModel
 from sagemaker.predictor import RealTimePredictor
 
 MODEL_DATA = "s3://bucket/model.tar.gz"
@@ -37,12 +37,14 @@
 
 JOB_NAME = "default-job-name"
 JOB_NAME_2 = "banana-auto-ml-job"
+JOB_NAME_3 = "descriptive-auto-ml-job"
 VOLUME_KMS_KEY = "volume-kms-key-id-string"
 OUTPUT_KMS_KEY = "output-kms-key-id-string"
 OUTPUT_PATH = "s3://my_other_bucket/"
 BASE_JOB_NAME = "banana"
 PROBLEM_TYPE = "BinaryClassification"
 BLACKLISTED_ALGORITHM = ["xgboost"]
+LIST_TAGS_RESULT = {"Tags": [{"Key": "key1", "Value": "value1"}]}
 MAX_CANDIDATES = 10
 MAX_RUNTIME_PER_TRAINING_JOB = 3600
 TOTAL_JOB_RUNTIME = 36000
@@ -57,6 +59,33 @@
 BEST_CANDIDATE_2 = {"best-candidate": "best-trial-2"}
 AUTO_ML_DESC = {"AutoMLJobName": JOB_NAME, "BestCandidate": BEST_CANDIDATE}
 AUTO_ML_DESC_2 = {"AutoMLJobName": JOB_NAME_2, "BestCandidate": BEST_CANDIDATE_2}
+AUTO_ML_DESC_3 = {
+    "AutoMLJobArn": "automl_job_arn",
+    "AutoMLJobConfig": {
+        "CompletionCriteria": {
+            "MaxAutoMLJobRuntimeInSeconds": 3000,
+            "MaxCandidates": 28,
+            "MaxRuntimePerTrainingJobInSeconds": 100,
+        },
+        "SecurityConfig": {"EnableInterContainerTrafficEncryption": True},
+    },
+    "AutoMLJobName": "mock_automl_job_name",
+    "AutoMLJobObjective": {"MetricName": "Auto"},
+    "AutoMLJobSecondaryStatus": "Completed",
+    "AutoMLJobStatus": "Completed",
+    "GenerateCandidateDefinitionsOnly": False,
+    "InputDataConfig": [
+        {
+            "DataSource": {
+                "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": "s3://input/prefix"}
+            },
+            "TargetAttributeName": "y",
+        }
+    ],
+    "OutputDataConfig": {"KmsKeyId": "string", "S3OutputPath": "s3://output_prefix"},
+    "ProblemType": "Auto",
+    "RoleArn": "mock_role_arn",
+}
 
 INFERENCE_CONTAINERS = [
     {
@@ -76,6 +105,33 @@
     },
 ]
 
+CLASSIFICATION_INFERENCE_CONTAINERS = [
+    {
+        "Environment": {"SAGEMAKER_PROGRAM": "sagemaker_serve"},
+        "Image": "account.dkr.ecr.us-west-2.amazonaws.com/sagemaker-auto-ml-data-processing:1.0-cpu-py3",
+        "ModelDataUrl": "s3://sagemaker-us-west-2-account/sagemaker-auto-ml-gamma/data-processing/output",
+    },
+    {
+        "Environment": {
+            "MAX_CONTENT_LENGTH": "20000000",
+            "SAGEMAKER_INFERENCE_SUPPORT": "probability,probabilities,predicted_label",
+            "SAGEMAKER_INFERENCE_OUTPUT": "predicted_label",
+        },
+        "Image": "account.dkr.ecr.us-west-2.amazonaws.com/sagemaker-auto-ml-training:1.0-cpu-py3",
+        "ModelDataUrl": "s3://sagemaker-us-west-2-account/sagemaker-auto-ml-gamma/training/output",
+    },
+    {
+        "Environment": {
+            "INVERSE_LABEL_TRANSFORM": "1",
+            "SAGEMAKER_INFERENCE_SUPPORT": "probability,probabilities,predicted_label,labels",
+            "SAGEMAKER_INFERENCE_OUTPUT": "predicted_label",
+            "SAGEMAKER_INFERENCE_INPUT": "predicted_label",
+        },
+        "Image": "account.dkr.ecr.us-west-2.amazonaws.com/sagemaker-auto-ml-transform:1.0-cpu-py3",
+        "ModelDataUrl": "s3://sagemaker-us-west-2-account/sagemaker-auto-ml-gamma/transform/output",
+    },
+]
+
 CANDIDATE_STEPS = [
     {
         "CandidateStepName": "training-job/sagemaker-auto-ml-gamma/data-processing",
@@ -97,6 +153,12 @@
     "CandidateSteps": CANDIDATE_STEPS,
 }
 
+CLASSIFICATION_CANDIDATE_DICT = {
+    "CandidateName": "candidate_mock",
+    "InferenceContainers": CLASSIFICATION_INFERENCE_CONTAINERS,
+    "CandidateSteps": CANDIDATE_STEPS,
+}
+
 TRAINING_JOB = {
     "AlgorithmSpecification": {
         "AlgorithmName": "string",
@@ -143,6 +205,8 @@ def describe_auto_ml_job_mock(job_name=None):
         return AUTO_ML_DESC
     elif job_name == JOB_NAME_2:
         return AUTO_ML_DESC_2
+    elif job_name == JOB_NAME_3:
+        return AUTO_ML_DESC_3
 
 
 @pytest.fixture()
@@ -168,7 +232,7 @@ def sagemaker_session():
         name="describe_transform_job", return_value=TRANSFORM_JOB
     )
     sms.list_candidates = Mock(name="list_candidates", return_value={"Candidates": []})
-
+    sms.sagemaker_client.list_tags = Mock(name="list_tags", return_value=LIST_TAGS_RESULT)
     return sms
 
 
@@ -452,29 +516,17 @@ def test_deploy(sagemaker_session, candidate_mock):
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
+    mock_pipeline = Mock(name="pipeline_model")
+    mock_pipeline.deploy = Mock(name="model_deploy")
     auto_ml.best_candidate = Mock(name="best_candidate", return_value=CANDIDATE_DICT)
-    auto_ml._deploy_inference_pipeline = Mock("_deploy_inference_pipeline", return_value=None)
+    auto_ml.create_model = Mock(name="create_model", return_value=mock_pipeline)
     auto_ml.deploy(
         initial_instance_count=INSTANCE_COUNT,
         instance_type=INSTANCE_TYPE,
         sagemaker_session=sagemaker_session,
     )
-    auto_ml._deploy_inference_pipeline.assert_called_once()
-    auto_ml._deploy_inference_pipeline.assert_called_with(
-        candidate_mock.containers,
-        initial_instance_count=INSTANCE_COUNT,
-        instance_type=INSTANCE_TYPE,
-        name=None,
-        sagemaker_session=sagemaker_session,
-        endpoint_name=None,
-        tags=None,
-        wait=True,
-        update_endpoint=False,
-        vpc_config=None,
-        enable_network_isolation=False,
-        model_kms_key=None,
-        predictor_cls=None,
-    )
+    auto_ml.create_model.assert_called_once()
+    mock_pipeline.deploy.assert_called_once()
 
 
 @patch("sagemaker.automl.automl.CandidateEstimator")
@@ -484,7 +536,10 @@ def test_deploy_optional_args(candidate_estimator, sagemaker_session, candidate_
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
-    auto_ml._deploy_inference_pipeline = Mock("_deploy_inference_pipeline", return_value=None)
+    mock_pipeline = Mock(name="pipeline_model")
+    mock_pipeline.deploy = Mock(name="model_deploy")
+    auto_ml.best_candidate = Mock(name="best_candidate", return_value=CANDIDATE_DICT)
+    auto_ml.create_model = Mock(name="create_model", return_value=mock_pipeline)
 
     auto_ml.deploy(
         initial_instance_count=INSTANCE_COUNT,
@@ -500,25 +555,31 @@ def test_deploy_optional_args(candidate_estimator, sagemaker_session, candidate_
         enable_network_isolation=True,
         model_kms_key=OUTPUT_KMS_KEY,
         predictor_cls=RealTimePredictor,
+        inference_response_keys=None,
     )
-    auto_ml._deploy_inference_pipeline.assert_called_once()
-    auto_ml._deploy_inference_pipeline.assert_called_with(
-        candidate_mock.containers,
-        initial_instance_count=INSTANCE_COUNT,
-        instance_type=INSTANCE_TYPE,
+
+    auto_ml.create_model.assert_called_once()
+    auto_ml.create_model.assert_called_with(
         name=JOB_NAME,
         sagemaker_session=sagemaker_session,
-        endpoint_name=JOB_NAME,
-        tags=TAGS,
-        wait=False,
-        update_endpoint=True,
+        candidate=CANDIDATE_DICT,
+        inference_response_keys=None,
         vpc_config=VPC_CONFIG,
         enable_network_isolation=True,
         model_kms_key=OUTPUT_KMS_KEY,
         predictor_cls=RealTimePredictor,
     )
 
-    candidate_estimator.assert_called_with(CANDIDATE_DICT, sagemaker_session=sagemaker_session)
+    mock_pipeline.deploy.assert_called_once()
+
+    mock_pipeline.deploy.assert_called_with(
+        initial_instance_count=INSTANCE_COUNT,
+        instance_type=INSTANCE_TYPE,
+        endpoint_name=JOB_NAME,
+        tags=TAGS,
+        wait=False,
+        update_endpoint=True,
+    )
 
 
 def test_candidate_estimator_get_steps(sagemaker_session):
@@ -536,3 +597,68 @@ def test_candidate_estimator_fit(sagemaker_session):
     candidate_estimator.fit(inputs)
     sagemaker_session.train.assert_called()
     sagemaker_session.transform.assert_called()
+
+
+def test_validate_and_update_inference_response():
+    cic = CLASSIFICATION_INFERENCE_CONTAINERS.copy()
+
+    AutoML.validate_and_update_inference_response(
+        inference_containers=cic,
+        inference_response_keys=["predicted_label", "labels", "probabilities", "probability"],
+    )
+
+    assert (
+        cic[2]["Environment"]["SAGEMAKER_INFERENCE_OUTPUT"]
+        == "predicted_label,labels,probabilities,probability"
+    )
+    assert (
+        cic[2]["Environment"]["SAGEMAKER_INFERENCE_INPUT"]
+        == "predicted_label,probabilities,probability"
+    )
+    assert (
+        cic[1]["Environment"]["SAGEMAKER_INFERENCE_OUTPUT"]
+        == "predicted_label,probabilities,probability"
+    )
+
+
+def test_validate_and_update_inference_response_wrong_input():
+    cic = CLASSIFICATION_INFERENCE_CONTAINERS.copy()
+
+    with pytest.raises(
+        ValueError,
+        message="Requested inference output keys [wrong_key, wrong_label] are unsupported. "
+        "The supported inference keys are [probability, probabilities, predicted_label, labels]",
+    ):
+        AutoML.validate_and_update_inference_response(
+            inference_containers=cic,
+            inference_response_keys=["wrong_key", "wrong_label", "probabilities", "probability"],
+        )
+
+
+def test_create_model(sagemaker_session):
+    auto_ml = AutoML(
+        role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
+    )
+
+    pipeline_model = auto_ml.create_model(
+        name=JOB_NAME,
+        sagemaker_session=sagemaker_session,
+        candidate=CLASSIFICATION_CANDIDATE_DICT,
+        vpc_config=VPC_CONFIG,
+        enable_network_isolation=True,
+        model_kms_key=None,
+        predictor_cls=None,
+        inference_response_keys=None,
+    )
+
+    assert isinstance(pipeline_model, PipelineModel)
+
+
+def test_attach(sagemaker_session):
+    aml = AutoML.attach(job_name=JOB_NAME_3, sagemaker_session=sagemaker_session)
+    assert aml.current_job_name == JOB_NAME_3
+    assert aml.role == "mock_role_arn"
+    assert aml.target_attribute_name == "y"
+    assert aml.problem_type == "Auto"
+    assert aml.output_path == "s3://output_prefix"
+    assert aml.tags == LIST_TAGS_RESULT

From 333cdbf371571c9f831f3157bc6c5eb0349be458 Mon Sep 17 00:00:00 2001
From: pialidas <pialidas@amazon.com>
Date: Wed, 10 Jun 2020 12:14:26 -0400
Subject: [PATCH 2/9] Allow selecting inference response for Autopilot
 generated models

1. Added attach() method to AutoML to allow attaching an AutoML object
to an existing AutoMLJob
2. Added create_model() method to AutoML that returns a PipelineModel
that can be used by the
3. Added a util function validate_and_update_inference_response() to
update the inference container list based on the requested
inference response keys
4. Updated and added unit tests
---
 src/sagemaker/automl/automl.py              | 2 +-
 tests/unit/sagemaker/automl/test_auto_ml.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py
index b060d7e369..334badfa9c 100644
--- a/src/sagemaker/automl/automl.py
+++ b/src/sagemaker/automl/automl.py
@@ -467,7 +467,7 @@ def _get_supported_inference_keys(cls, container, default=None):
         try:
             return [
                 x.strip()
-                for x in container["Environment"]["SAGEMAKER_INFERENCE_SUPPORT"].split(",")
+                for x in container["Environment"]["SAGEMAKER_INFERENCE_SUPPORTED"].split(",")
             ]
         except KeyError:
             if default is None:
diff --git a/tests/unit/sagemaker/automl/test_auto_ml.py b/tests/unit/sagemaker/automl/test_auto_ml.py
index 5ff99e3b8d..b7df53bfe3 100644
--- a/tests/unit/sagemaker/automl/test_auto_ml.py
+++ b/tests/unit/sagemaker/automl/test_auto_ml.py
@@ -114,7 +114,7 @@
     {
         "Environment": {
             "MAX_CONTENT_LENGTH": "20000000",
-            "SAGEMAKER_INFERENCE_SUPPORT": "probability,probabilities,predicted_label",
+            "SAGEMAKER_INFERENCE_SUPPORTED": "probability,probabilities,predicted_label",
             "SAGEMAKER_INFERENCE_OUTPUT": "predicted_label",
         },
         "Image": "account.dkr.ecr.us-west-2.amazonaws.com/sagemaker-auto-ml-training:1.0-cpu-py3",
@@ -123,7 +123,7 @@
     {
         "Environment": {
             "INVERSE_LABEL_TRANSFORM": "1",
-            "SAGEMAKER_INFERENCE_SUPPORT": "probability,probabilities,predicted_label,labels",
+            "SAGEMAKER_INFERENCE_SUPPORTED": "probability,probabilities,predicted_label,labels",
             "SAGEMAKER_INFERENCE_OUTPUT": "predicted_label",
             "SAGEMAKER_INFERENCE_INPUT": "predicted_label",
         },

From 5f21aa2e94c75c9f462f53805e2653293ba75b83 Mon Sep 17 00:00:00 2001
From: pialidas <pialidas@amazon.com>
Date: Wed, 10 Jun 2020 13:14:29 -0400
Subject: [PATCH 3/9] fixed unit tests for py27 and integ test

---
 src/sagemaker/automl/automl.py              | 4 ++--
 tests/integ/test_auto_ml.py                 | 5 ++++-
 tests/unit/sagemaker/automl/test_auto_ml.py | 8 +++++---
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py
index 334badfa9c..8b1a69173f 100644
--- a/src/sagemaker/automl/automl.py
+++ b/src/sagemaker/automl/automl.py
@@ -117,8 +117,8 @@ def attach(cls, job_name, sagemaker_session=None):
 
         _auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(job_name)
         automl_job_tags = sagemaker_session.sagemaker_client.list_tags(
-            _auto_ml_job_desc["AutoMLJobArn"]
-        )
+            ResourceArn=_auto_ml_job_desc["AutoMLJobArn"])["Tags"]
+
         amlj = AutoML(
             role=_auto_ml_job_desc["RoleArn"],
             target_attribute_name=_auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"],
diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py
index ed27492f16..1dd25da0bf 100644
--- a/tests/integ/test_auto_ml.py
+++ b/tests/integ/test_auto_ml.py
@@ -277,7 +277,10 @@ def test_deploy_best_candidate(sagemaker_session, cpu_instance_type):
     sagemaker_session.sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
 
 
-@pytest.mark.canary_quick
+@pytest.mark.skipif(
+    tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
+    reason="AutoML is not supported in the region yet.",
+)
 def test_create_model_best_candidate(sagemaker_session, cpu_instance_type):
     auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
 
diff --git a/tests/unit/sagemaker/automl/test_auto_ml.py b/tests/unit/sagemaker/automl/test_auto_ml.py
index b7df53bfe3..e62635bd18 100644
--- a/tests/unit/sagemaker/automl/test_auto_ml.py
+++ b/tests/unit/sagemaker/automl/test_auto_ml.py
@@ -12,6 +12,8 @@
 # language governing permissions and limitations under the License.
 from __future__ import absolute_import
 
+import copy
+
 import pytest
 from mock import Mock, patch
 from sagemaker import AutoML, AutoMLJob, AutoMLInput, CandidateEstimator, PipelineModel
@@ -600,7 +602,7 @@ def test_candidate_estimator_fit(sagemaker_session):
 
 
 def test_validate_and_update_inference_response():
-    cic = CLASSIFICATION_INFERENCE_CONTAINERS.copy()
+    cic = copy.copy(CLASSIFICATION_INFERENCE_CONTAINERS)
 
     AutoML.validate_and_update_inference_response(
         inference_containers=cic,
@@ -622,7 +624,7 @@ def test_validate_and_update_inference_response():
 
 
 def test_validate_and_update_inference_response_wrong_input():
-    cic = CLASSIFICATION_INFERENCE_CONTAINERS.copy()
+    cic = copy.copy(CLASSIFICATION_INFERENCE_CONTAINERS)
 
     with pytest.raises(
         ValueError,
@@ -661,4 +663,4 @@ def test_attach(sagemaker_session):
     assert aml.target_attribute_name == "y"
     assert aml.problem_type == "Auto"
     assert aml.output_path == "s3://output_prefix"
-    assert aml.tags == LIST_TAGS_RESULT
+    assert aml.tags == LIST_TAGS_RESULT["Tags"]

From 450ceb760980c75033bec12c19659b7290227419 Mon Sep 17 00:00:00 2001
From: pialidas <pialidas@amazon.com>
Date: Wed, 10 Jun 2020 14:02:32 -0400
Subject: [PATCH 4/9] black formatting

---
 src/sagemaker/automl/automl.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py
index 8b1a69173f..648281699e 100644
--- a/src/sagemaker/automl/automl.py
+++ b/src/sagemaker/automl/automl.py
@@ -117,7 +117,8 @@ def attach(cls, job_name, sagemaker_session=None):
 
         _auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(job_name)
         automl_job_tags = sagemaker_session.sagemaker_client.list_tags(
-            ResourceArn=_auto_ml_job_desc["AutoMLJobArn"])["Tags"]
+            ResourceArn=_auto_ml_job_desc["AutoMLJobArn"]
+        )["Tags"]
 
         amlj = AutoML(
             role=_auto_ml_job_desc["RoleArn"],

From 68b38d053d6f7bbd691863ab5699cb65c742f71a Mon Sep 17 00:00:00 2001
From: pialidas <pialidas@amazon.com>
Date: Wed, 10 Jun 2020 19:10:56 -0400
Subject: [PATCH 5/9] fix the test

---
 src/sagemaker/automl/automl.py | 38 ++++++++++++++++++----------------
 tests/integ/test_auto_ml.py    |  2 +-
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py
index 648281699e..3bd20e7fc1 100644
--- a/src/sagemaker/automl/automl.py
+++ b/src/sagemaker/automl/automl.py
@@ -1,3 +1,4 @@
+# %load /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/automl/automl.py
 # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
@@ -123,33 +124,34 @@ def attach(cls, job_name, sagemaker_session=None):
         amlj = AutoML(
             role=_auto_ml_job_desc["RoleArn"],
             target_attribute_name=_auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"],
-            problem_type=_auto_ml_job_desc["ProblemType"],
-            output_path=_auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"],
             output_kms_key=_auto_ml_job_desc["OutputDataConfig"].get("KmsKeyId"),
+            output_path=_auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"],
             base_job_name=job_name,
+            compression_type=_auto_ml_job_desc["InputDataConfig"][0].get("CompressionType"),
             sagemaker_session=sagemaker_session,
-            volume_kms_key=_auto_ml_job_desc["AutoMLJobConfig"]
+            volume_kms_key=_auto_ml_job_desc.get("AutoMLJobConfig", {})
             .get("SecurityConfig", {})
             .get("VolumeKmsKeyId"),
-            encrypt_inter_container_traffic=_auto_ml_job_desc["AutoMLJobConfig"]
+            encrypt_inter_container_traffic=_auto_ml_job_desc.get("AutoMLJobConfig", {})
             .get("SecurityConfig", {})
             .get("EnableInterContainerTrafficEncryption", False),
-            vpc_config=_auto_ml_job_desc["AutoMLJobConfig"]
+            vpc_config=_auto_ml_job_desc.get("AutoMLJobConfig", {})
             .get("SecurityConfig", {})
             .get("VpcConfig"),
-            max_candidates=_auto_ml_job_desc["AutoMLJobConfig"]["CompletionCriteria"][
-                "MaxCandidates"
-            ],
-            max_runtime_per_training_job_in_seconds=_auto_ml_job_desc["AutoMLJobConfig"][
-                "CompletionCriteria"
-            ]["MaxRuntimePerTrainingJobInSeconds"],
-            total_job_runtime_in_seconds=_auto_ml_job_desc["AutoMLJobConfig"]["CompletionCriteria"][
-                "MaxAutoMLJobRuntimeInSeconds"
-            ],
-            job_objective=_auto_ml_job_desc["AutoMLJobObjective"]["MetricName"],
-            generate_candidate_definitions_only=_auto_ml_job_desc[
-                "GenerateCandidateDefinitionsOnly"
-            ],
+            problem_type=_auto_ml_job_desc.get("ProblemType"),
+            max_candidates=_auto_ml_job_desc.get("AutoMLJobConfig", {})
+            .get("CompletionCriteria", {})
+            .get("MaxCandidates"),
+            max_runtime_per_training_job_in_seconds=_auto_ml_job_desc.get("AutoMLJobConfig", {})
+            .get("CompletionCriteria", {})
+            .get("MaxRuntimePerTrainingJobInSeconds"),
+            total_job_runtime_in_seconds=_auto_ml_job_desc.get("AutoMLJobConfig", {})
+            .get("CompletionCriteria", {})
+            .get("MaxAutoMLJobRuntimeInSeconds"),
+            job_objective=_auto_ml_job_desc.get("AutoMLJobObjective", {}).get("MetricName"),
+            generate_candidate_definitions_only=_auto_ml_job_desc.get(
+                "GenerateCandidateDefinitionsOnly", False
+            ),
             tags=automl_job_tags,
         )
         amlj.current_job_name = job_name
diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py
index 1dd25da0bf..45018307cd 100644
--- a/tests/integ/test_auto_ml.py
+++ b/tests/integ/test_auto_ml.py
@@ -284,7 +284,7 @@ def test_deploy_best_candidate(sagemaker_session, cpu_instance_type):
 def test_create_model_best_candidate(sagemaker_session, cpu_instance_type):
     auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
 
-    auto_ml = AutoML.attach(job_name=AUTO_ML_JOB_NAME)
+    auto_ml = AutoML.attach(job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session)
     best_candidate = auto_ml.best_candidate()
 
     with timeout(minutes=2):

From baad38ec318cf8d7ced27481c620f4d01e9f3123 Mon Sep 17 00:00:00 2001
From: pialidas <pialidas@amazon.com>
Date: Thu, 11 Jun 2020 00:44:14 -0400
Subject: [PATCH 6/9] fix the test

---
 tests/integ/test_auto_ml.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py
index 45018307cd..5e1e7268ed 100644
--- a/tests/integ/test_auto_ml.py
+++ b/tests/integ/test_auto_ml.py
@@ -38,6 +38,8 @@
 
 # use a succeeded AutoML job to test describe and list candidates method, otherwise tests will run too long
 AUTO_ML_JOB_NAME = "python-sdk-integ-test-base-job"
+DEFAULT_MODEL_NAME = "python-sdk-automl"
+
 
 EXPECTED_DEFAULT_JOB_CONFIG = {
     "CompletionCriteria": {"MaxCandidates": 3},
@@ -287,8 +289,9 @@ def test_create_model_best_candidate(sagemaker_session, cpu_instance_type):
     auto_ml = AutoML.attach(job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session)
     best_candidate = auto_ml.best_candidate()
 
-    with timeout(minutes=2):
+    with timeout(minutes=5):
         pipeline_model = auto_ml.create_model(
+            name=DEFAULT_MODEL_NAME,
             candidate=best_candidate,
             sagemaker_session=sagemaker_session,
             vpc_config=None,

From 0324152599384ab0d50500640e21d2e94a3c76bd Mon Sep 17 00:00:00 2001
From: pialidas <pialidas@amazon.com>
Date: Thu, 11 Jun 2020 19:08:55 -0400
Subject: [PATCH 7/9] PR comments addressed

---
 src/sagemaker/automl/automl.py              | 49 +++++++++++----------
 tests/integ/test_auto_ml.py                 |  2 +-
 tests/unit/sagemaker/automl/test_auto_ml.py |  2 +-
 3 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py
index 3bd20e7fc1..536065c21a 100644
--- a/src/sagemaker/automl/automl.py
+++ b/src/sagemaker/automl/automl.py
@@ -1,4 +1,3 @@
-# %load /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/automl/automl.py
 # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
@@ -102,60 +101,64 @@ def fit(self, inputs=None, wait=True, logs=True, job_name=None):
             self.latest_auto_ml_job.wait(logs=logs)
 
     @classmethod
-    def attach(cls, job_name, sagemaker_session=None):
+    def attach(cls, auto_ml_job_name, sagemaker_session=None):
         """Attach to an existing AutoML job.
 
+        Creates and returns a AutoML bound to an existing automl job.
+
         Args:
-            job_name (str): AutoML job name
+            auto_ml_job_name (str): AutoML job name
             sagemaker_session (sagemaker.session.Session): A SageMaker Session
                 object, used for SageMaker interactions (default: None). If not
-                specified, the one originally associated with the ``AutoML`` instance is used.:
+                specified, the one originally associated with the ``AutoML`` instance is used.
 
         Returns:
+            sagemaker.automl.AutoML: A ``AutoML`` instance with the attached automl job.
 
         """
         sagemaker_session = sagemaker_session or Session()
 
-        _auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(job_name)
+        auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(auto_ml_job_name)
         automl_job_tags = sagemaker_session.sagemaker_client.list_tags(
-            ResourceArn=_auto_ml_job_desc["AutoMLJobArn"]
+            ResourceArn=auto_ml_job_desc["AutoMLJobArn"]
         )["Tags"]
 
         amlj = AutoML(
-            role=_auto_ml_job_desc["RoleArn"],
-            target_attribute_name=_auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"],
-            output_kms_key=_auto_ml_job_desc["OutputDataConfig"].get("KmsKeyId"),
-            output_path=_auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"],
-            base_job_name=job_name,
-            compression_type=_auto_ml_job_desc["InputDataConfig"][0].get("CompressionType"),
+            role=auto_ml_job_desc["RoleArn"],
+            target_attribute_name=auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"],
+            output_kms_key=auto_ml_job_desc["OutputDataConfig"].get("KmsKeyId"),
+            output_path=auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"],
+            base_job_name=auto_ml_job_name,
+            compression_type=auto_ml_job_desc["InputDataConfig"][0].get("CompressionType"),
             sagemaker_session=sagemaker_session,
-            volume_kms_key=_auto_ml_job_desc.get("AutoMLJobConfig", {})
+            volume_kms_key=auto_ml_job_desc.get("AutoMLJobConfig", {})
             .get("SecurityConfig", {})
             .get("VolumeKmsKeyId"),
-            encrypt_inter_container_traffic=_auto_ml_job_desc.get("AutoMLJobConfig", {})
+            encrypt_inter_container_traffic=auto_ml_job_desc.get("AutoMLJobConfig", {})
             .get("SecurityConfig", {})
             .get("EnableInterContainerTrafficEncryption", False),
-            vpc_config=_auto_ml_job_desc.get("AutoMLJobConfig", {})
+            vpc_config=auto_ml_job_desc.get("AutoMLJobConfig", {})
             .get("SecurityConfig", {})
             .get("VpcConfig"),
-            problem_type=_auto_ml_job_desc.get("ProblemType"),
-            max_candidates=_auto_ml_job_desc.get("AutoMLJobConfig", {})
+            problem_type=auto_ml_job_desc.get("ProblemType"),
+            max_candidates=auto_ml_job_desc.get("AutoMLJobConfig", {})
             .get("CompletionCriteria", {})
             .get("MaxCandidates"),
-            max_runtime_per_training_job_in_seconds=_auto_ml_job_desc.get("AutoMLJobConfig", {})
+            max_runtime_per_training_job_in_seconds=auto_ml_job_desc.get("AutoMLJobConfig", {})
             .get("CompletionCriteria", {})
             .get("MaxRuntimePerTrainingJobInSeconds"),
-            total_job_runtime_in_seconds=_auto_ml_job_desc.get("AutoMLJobConfig", {})
+            total_job_runtime_in_seconds=auto_ml_job_desc.get("AutoMLJobConfig", {})
             .get("CompletionCriteria", {})
             .get("MaxAutoMLJobRuntimeInSeconds"),
-            job_objective=_auto_ml_job_desc.get("AutoMLJobObjective", {}).get("MetricName"),
-            generate_candidate_definitions_only=_auto_ml_job_desc.get(
+            job_objective=auto_ml_job_desc.get("AutoMLJobObjective", {}).get("MetricName"),
+            generate_candidate_definitions_only=auto_ml_job_desc.get(
                 "GenerateCandidateDefinitionsOnly", False
             ),
             tags=automl_job_tags,
         )
-        amlj.current_job_name = job_name
-        amlj._auto_ml_job_desc = _auto_ml_job_desc
+        amlj.current_job_name = auto_ml_job_name
+        amlj.latest_auto_ml_job = auto_ml_job_name  # pylint: disable=W0201
+        amlj._auto_ml_job_desc = auto_ml_job_desc
         return amlj
 
     def describe_auto_ml_job(self, job_name=None):
diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py
index 5e1e7268ed..0b86cf5551 100644
--- a/tests/integ/test_auto_ml.py
+++ b/tests/integ/test_auto_ml.py
@@ -286,7 +286,7 @@ def test_deploy_best_candidate(sagemaker_session, cpu_instance_type):
 def test_create_model_best_candidate(sagemaker_session, cpu_instance_type):
     auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
 
-    auto_ml = AutoML.attach(job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session)
+    auto_ml = AutoML.attach(auto_ml_job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session)
     best_candidate = auto_ml.best_candidate()
 
     with timeout(minutes=5):
diff --git a/tests/unit/sagemaker/automl/test_auto_ml.py b/tests/unit/sagemaker/automl/test_auto_ml.py
index e62635bd18..70adc840bc 100644
--- a/tests/unit/sagemaker/automl/test_auto_ml.py
+++ b/tests/unit/sagemaker/automl/test_auto_ml.py
@@ -657,7 +657,7 @@ def test_create_model(sagemaker_session):
 
 
 def test_attach(sagemaker_session):
-    aml = AutoML.attach(job_name=JOB_NAME_3, sagemaker_session=sagemaker_session)
+    aml = AutoML.attach(auto_ml_job_name=JOB_NAME_3, sagemaker_session=sagemaker_session)
     assert aml.current_job_name == JOB_NAME_3
     assert aml.role == "mock_role_arn"
     assert aml.target_attribute_name == "y"

From fdb858fd13d79a9aadd2eef900829da5126d90a6 Mon Sep 17 00:00:00 2001
From: pialidas <pialidas@amazon.com>
Date: Thu, 11 Jun 2020 20:03:26 -0400
Subject: [PATCH 8/9] Few minor fixes to the comments

---
 src/sagemaker/automl/automl.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py
index 536065c21a..c204356e5b 100644
--- a/src/sagemaker/automl/automl.py
+++ b/src/sagemaker/automl/automl.py
@@ -467,8 +467,8 @@ def _get_supported_inference_keys(cls, container, default=None):
             List of keys the container support or default
 
         Raises:
-            Value error if the default is None and the container definition has
-            no marker environment variable.
+            KeyError if the default is None and the container definition has
+            no marker environment variable SAGEMAKER_INFERENCE_SUPPORTED.
         """
         try:
             return [

From f263ae8013cc77509feca5be37916d67f7694667 Mon Sep 17 00:00:00 2001
From: pialidas <pialidas@amazon.com>
Date: Thu, 11 Jun 2020 22:00:03 -0400
Subject: [PATCH 9/9] fix the arg name

---
 tests/integ/test_auto_ml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py
index 0b86cf5551..fc547256c6 100644
--- a/tests/integ/test_auto_ml.py
+++ b/tests/integ/test_auto_ml.py
@@ -208,7 +208,7 @@ def test_auto_ml_attach(sagemaker_session):
     auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
 
     attached_automl_job = AutoML.attach(
-        job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session
+        auto_ml_job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session
     )
     attached_desc = attached_automl_job.describe_auto_ml_job()
     assert attached_desc["AutoMLJobName"] == AUTO_ML_JOB_NAME