mvsusp
diff --git a/‎CHANGELOG.rst
Lines changed: 2 additions & 1 deletion b/‎CHANGELOG.rst
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.rst
Lines changed: 34 additions & 2 deletions b/‎README.rst
Lines changed: 34 additions & 2 deletions
diff --git a/‎src/sagemaker/model.py
Lines changed: 13 additions & 8 deletions b/‎src/sagemaker/model.py
Lines changed: 13 additions & 8 deletions
diff --git a/‎src/sagemaker/pipeline.py
Lines changed: 51 additions & 0 deletions b/‎src/sagemaker/pipeline.py
Lines changed: 51 additions & 0 deletions
diff --git a/‎src/sagemaker/transformer.py
Lines changed: 10 additions & 2 deletions b/‎src/sagemaker/transformer.py
Lines changed: 10 additions & 2 deletions
diff --git a/‎tests/data/sparkml_xgboost_pipeline/invalid_input.csv
Lines changed: 1 addition & 0 deletions b/‎tests/data/sparkml_xgboost_pipeline/invalid_input.csv
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/data/sparkml_xgboost_pipeline/valid_input.csv
Lines changed: 1 addition & 0 deletions b/‎tests/data/sparkml_xgboost_pipeline/valid_input.csv
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/integ/kms_utils.py
Lines changed: 1 addition & 34 deletions b/‎tests/integ/kms_utils.py
Lines changed: 1 addition & 34 deletions
diff --git a/‎tests/integ/marketplace_utils.py
Lines changed: 29 additions & 0 deletions b/‎tests/integ/marketplace_utils.py
Lines changed: 29 additions & 0 deletions
@@ -6,13 +6,14 @@ CHANGELOG
 ==========
 
 * bug-fix: pass kms id as parameter for uploading code with Server side encryption
+* feature: ``PipelineModel``: Create a Transformer from a PipelineModel
 
 1.18.4
 ======
 
 * doc-fix: Remove incorrect parameter for EI TFS Python README
 * feature: ``Predictor``: delete SageMaker model
-* feature: ``Pipeline``: delete SageMaker model
+* feature: ``PipelineModel``: delete SageMaker model
 * bug-fix: Estimator.attach works with training jobs without hyperparameters
 * doc-fix: remove duplicate content from mxnet/README.rst
 * doc-fix: move overview content in main README into sphynx project
 
@@ -965,8 +965,9 @@ the ML Pipeline.
    endpoint_name = 'inference-pipeline-endpoint'
    sm_model = PipelineModel(name=model_name, role=sagemaker_role, models=[sparkml_model, xgb_model])
 
-This will define a ``PipelineModel`` consisting of SparkML model and an XGBoost model stacked sequentially. For more
-information about how to train an XGBoost model, please refer to the XGBoost notebook here_.
+This will define a ``PipelineModel`` consisting of SparkML model and an XGBoost model stacked sequentially.
+
+For more information about how to train an XGBoost model, please refer to the XGBoost notebook here_.
 
 .. _here: https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html#xgboost-sample-notebooks
 
@@ -978,6 +979,37 @@ This returns a predictor the same way an ``Estimator`` does when ``deploy()`` is
 request using this predictor, you should pass the data that the first container expects and the predictor will return the
 output from the last container.
 
+You can also use a ``PipelineModel`` to create Transform Jobs for batch transformations. Using the same ``PipelineModel`` ``sm_model`` as above:
+
+.. code:: python
+
+   # Only instance_type and instance_count are required.
+   transformer = sm_model.transformer(instance_type='ml.c5.xlarge',
+                                      instance_count=1,
+                                      strategy='MultiRecord',
+                                      max_payload=6,
+                                      max_concurrent_transforms=8,
+                                      accept='text/csv',
+                                      assemble_with='Line',
+                                      output_path='s3://my-output-bucket/path/to/my/output/data/')
+   # Only data is required.
+   transformer.transform(data='s3://my-input-bucket/path/to/my/csv/data',
+                         content_type='text/csv',
+                         split_type='Line')
+   # Waits for the Pipeline Transform Job to finish.
+   transformer.wait()
+
+This runs a transform job against all the files under ``s3://mybucket/path/to/my/csv/data``, transforming the input
+data in order with each model container in the pipeline. For each input file that was successfully transformed, one output file in ``s3://my-output-bucket/path/to/my/output/data/``
+will be created with the same name, appended with '.out'.
+
+This transform job will split CSV files by newline separators, which is especially useful if the input files are large. The Transform Job will
+assemble the outputs with line separators when writing each input file's corresponding output file.
+
+Each payload entering the first model container will be up to six megabytes, and up to eight inference requests will be sent at the
+same time to the first model container. Since each payload will consist of a mini-batch of multiple CSV records, the model
+containers will transform each mini-batch of records.
+
 For comprehensive examples on how to use Inference Pipelines please refer to the following notebooks:
 
 - `inference_pipeline_sparkml_xgboost_abalone.ipynb <https://github.com/awslabs/amazon-sagemaker-examples/blob/master/advanced_functionality/inference_pipeline_sparkml_xgboost_abalone/inference_pipeline_sparkml_xgboost_abalone.ipynb>`__
 
@@ -19,6 +19,9 @@
 from sagemaker import fw_utils, local, session, utils
 from sagemaker.transformer import Transformer
 
+logging.basicConfig()
+LOGGER = logging.getLogger('sagemaker')
+LOGGER.setLevel(logging.INFO)
 
 NEO_ALLOWED_TARGET_INSTANCE_FAMILY = set(['ml_c5', 'ml_m5', 'ml_c4', 'ml_m4', 'jetson_tx1', 'jetson_tx2', 'ml_p2',
                                           'ml_p3', 'deeplens', 'rasp3b'])
@@ -99,7 +102,9 @@ def _create_sagemaker_model(self, instance_type, accelerator_type=None):
         Args:
             instance_type (str): The EC2 instance type that this Model will be used for, this is only
                 used to determine if the image needs GPU support or not.
-            accelerator_type (str): <put docs here>
+            accelerator_type (str): Type of Elastic Inference accelerator to attach to an endpoint for model loading
+                and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator
+                will be attached to the endpoint.
         """
         container_def = self.prepare_container_def(instance_type, accelerator_type=accelerator_type)
         self.name = self.name or utils.name_from_image(container_def['Image'])
@@ -190,9 +195,13 @@ def compile(self, target_instance_family, input_shape, output_path, role,
         self.sagemaker_session.compile_model(**config)
         job_status = self.sagemaker_session.wait_for_compilation_job(job_name)
         self.model_data = job_status['ModelArtifacts']['S3ModelArtifacts']
-        self.image = self._neo_image(self.sagemaker_session.boto_region_name, target_instance_family, framework,
-                                     framework_version)
-        self._is_compiled_model = True
+        if target_instance_family.startswith('ml_'):
+            self.image = self._neo_image(self.sagemaker_session.boto_region_name, target_instance_family, framework,
+                                         framework_version)
+            self._is_compiled_model = True
+        else:
+            LOGGER.warning("The intance type {} is not supported to deploy via SageMaker,"
+                           "please deploy the model on the device by yourself.".format(target_instance_family))
         return self
 
     def deploy(self, initial_instance_count, instance_type, accelerator_type=None, endpoint_name=None,
@@ -285,10 +294,6 @@ def transformer(self, instance_count, instance_type, strategy=None, assemble_wit
             max_payload (int): Maximum size of the payload in a single HTTP request to the container in MB.
             tags (list[dict]): List of tags for labeling a transform job. If none specified, then the tags used for
                 the training job are used for the transform job.
-            role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during
-                transform jobs. If not specified, the role from the Model will be used.
-            model_server_workers (int): Optional. The number of worker processes used by the inference server.
-                If None, server will use one worker per vCPU.
             volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML
                 compute instance (default: None).
         """
 
@@ -15,6 +15,7 @@
 import sagemaker
 from sagemaker.session import Session
 from sagemaker.utils import name_from_image
+from sagemaker.transformer import Transformer
 
 
 class PipelineModel(object):
@@ -104,6 +105,56 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags
         if self.predictor_cls:
             return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
 
+    def _create_sagemaker_pipeline_model(self, instance_type):
+        """Create a SageMaker Model Entity
+
+        Args:
+            instance_type (str): The EC2 instance type that this Model will be used for, this is only
+                used to determine if the image needs GPU support or not.
+            accelerator_type (str): Type of Elastic Inference accelerator to attach to an endpoint for model loading
+                and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator
+                will be attached to the endpoint.
+        """
+        if not self.sagemaker_session:
+            self.sagemaker_session = Session()
+
+        containers = self.pipeline_container_def(instance_type)
+
+        self.name = self.name or name_from_image(containers[0]['Image'])
+        self.sagemaker_session.create_model(self.name, self.role, containers, vpc_config=self.vpc_config)
+
+    def transformer(self, instance_count, instance_type, strategy=None, assemble_with=None, output_path=None,
+                    output_kms_key=None, accept=None, env=None, max_concurrent_transforms=None,
+                    max_payload=None, tags=None, volume_kms_key=None):
+        """Return a ``Transformer`` that uses this Model.
+
+        Args:
+            instance_count (int): Number of EC2 instances to use.
+            instance_type (str): Type of EC2 instance to use, for example, 'ml.c4.xlarge'.
+            strategy (str): The strategy used to decide how to batch records in a single request (default: None).
+                Valid values: 'MULTI_RECORD' and 'SINGLE_RECORD'.
+            assemble_with (str): How the output is assembled (default: None). Valid values: 'Line' or 'None'.
+            output_path (str): S3 location for saving the transform result. If not specified, results are stored to
+                a default bucket.
+            output_kms_key (str): Optional. KMS key ID for encrypting the transform output (default: None).
+            accept (str): The content type accepted by the endpoint deployed during the transform job.
+            env (dict): Environment variables to be set for use during the transform job (default: None).
+            max_concurrent_transforms (int): The maximum number of HTTP requests to be made to
+                each individual transform container at one time.
+            max_payload (int): Maximum size of the payload in a single HTTP request to the container in MB.
+            tags (list[dict]): List of tags for labeling a transform job. If none specified, then the tags used for
+                the training job are used for the transform job.
+            volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML
+                compute instance (default: None).
+        """
+        self._create_sagemaker_pipeline_model(instance_type)
+
+        return Transformer(self.name, instance_count, instance_type, strategy=strategy, assemble_with=assemble_with,
+                           output_path=output_path, output_kms_key=output_kms_key, accept=accept,
+                           max_concurrent_transforms=max_concurrent_transforms, max_payload=max_payload,
+                           env=env, tags=tags, base_transform_job_name=self.name,
+                           volume_kms_key=volume_kms_key, sagemaker_session=self.sagemaker_session)
+
     def delete_model(self):
         """Delete the SageMaker model backing this pipeline model. This does not delete the list of SageMaker models used
         in multiple containers to build the inference pipeline.
 
@@ -12,6 +12,8 @@
 # language governing permissions and limitations under the License.
 from __future__ import absolute_import
 
+from botocore import exceptions
+
 from sagemaker.job import _Job
 from sagemaker.session import Session
 from sagemaker.utils import base_name_from_image, name_from_base
@@ -119,8 +121,14 @@ def delete_model(self):
         self.sagemaker_session.delete_model(self.model_name)
 
     def _retrieve_image_name(self):
-        model_desc = self.sagemaker_session.sagemaker_client.describe_model(ModelName=self.model_name)
-        return model_desc['PrimaryContainer']['Image']
+        try:
+            model_desc = self.sagemaker_session.sagemaker_client.describe_model(ModelName=self.model_name)
+            return model_desc['PrimaryContainer']['Image']
+        except exceptions.ClientError:
+            raise ValueError('Failed to fetch model information for %s. '
+                             'Please ensure that the model exists. '
+                             'Local instance types require locally created models.'
+                             % self.model_name)
 
     def wait(self):
         self._ensure_last_transform_job()
 
@@ -0,0 +1 @@
+1.0,28.0,C,38.0,71.5,1.0
@@ -0,0 +1 @@
+1.0,C,38.0,71.5,1.0,female
@@ -14,7 +14,7 @@
 
 from botocore import exceptions
 
-KEY_ALIAS = "SageMakerKmsKey"
+KEY_ALIAS = "SageMakerIntegTestKmsKey"
 KEY_POLICY = '''
 {{
   "Version": "2012-10-17",
@@ -28,39 +28,6 @@
       }},
       "Action": "kms:*",
       "Resource": "*"
-    }},
-    {{
-      "Sid": "Allow use of the key",
-      "Effect": "Allow",
-      "Principal": {{
-        "AWS": "{account_id}"
-      }},
-      "Action": [
-        "kms:Encrypt",
-        "kms:Decrypt",
-        "kms:ReEncrypt*",
-        "kms:GenerateDataKey*",
-        "kms:DescribeKey"
-      ],
-      "Resource": "*"
-    }},
-    {{
-      "Sid": "Allow attachment of persistent resources",
-      "Effect": "Allow",
-      "Principal": {{
-        "AWS": "{account_id}"
-      }},
-      "Action": [
-        "kms:CreateGrant",
-        "kms:ListGrants",
-        "kms:RevokeGrant"
-      ],
-      "Resource": "*",
-      "Condition": {{
-        "Bool": {{
-          "kms:GrantIsForAWSResource": "true"
-        }}
-      }}
     }}
   ]
 }}
 
@@ -0,0 +1,29 @@
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+REGION_ACCOUNT_MAP = {
+    'us-east-1': '865070037744',
+    'us-east-2': '057799348421',
+    'us-west-2': '594846645681',
+    'eu-west-1': '985815980388',
+    'eu-central-1': '446921602837',
+    'ap-northeast-1': '977537786026',
+    'ap-northeast-2': '745090734665',
+    'ap-southeast-2': '666831318237',
+    'ap-southeast-1': '192199979996',
+    'ap-south-1': '077584701553',
+    'ca-central-1': '470592106596',
+    'eu-west-2': '856760150666',
+    'us-west-1': '382657785993'
+}