aws · chuyang-deng · Mar 6, 2020 · Mar 4, 2020 · Mar 4, 2020 · Mar 4, 2020
@@ -176,7 +176,7 @@ TensorFlow SageMaker Estimators
 
 By using TensorFlow SageMaker Estimators, you can train and host TensorFlow models on Amazon SageMaker.
 
-Supported versions of TensorFlow: ``1.4.1``, ``1.5.0``, ``1.6.0``, ``1.7.0``, ``1.8.0``, ``1.9.0``, ``1.10.0``, ``1.11.0``, ``1.12.0``, ``1.13.1``, ``1.14.0``, ``1.15.0``, ``2.0.0``.
+Supported versions of TensorFlow: ``1.4.1``, ``1.5.0``, ``1.6.0``, ``1.7.0``, ``1.8.0``, ``1.9.0``, ``1.10.0``, ``1.11.0``, ``1.12.0``, ``1.13.1``, ``1.14.0``, ``1.15.0``, ``1.15.2``, ``2.0.0``, ``2.0.1``, ``2.1.0``.
 
 Supported versions of TensorFlow for Elastic Inference: ``1.11.0``, ``1.12.0``, ``1.13.1``, ``1.14.0``.
 
@@ -208,7 +208,9 @@ PyTorch SageMaker Estimators
 
 With PyTorch SageMaker Estimators, you can train and host PyTorch models on Amazon SageMaker.
 
-Supported versions of PyTorch: ``0.4.0``, ``1.0.0``, ``1.1.0``, ``1.2.0``, ``1.3.1``.
+Supported versions of PyTorch: ``0.4.0``, ``1.0.0``, ``1.1.0``, ``1.2.0``, ``1.3.1``, ``1.4.0``.
+
+Supported versions of PyTorch for Elastic Inference: ``1.3.1``.
 
 We recommend that you use the latest supported version, because that's where we focus most of our development efforts.
 

@@ -6,6 +6,8 @@ With PyTorch Estimators and Models, you can train and host PyTorch models on Ama
 
 Supported versions of PyTorch: ``0.4.0``, ``1.0.0``, ``1.1.0``, ``1.2.0``, ``1.3.1``.
 
+Supported versions of PyTorch for Elastic Inference: ``1.3.1``.
+
 We recommend that you use the latest supported version, because that's where we focus most of our development efforts.
 
 You can visit the PyTorch repository at https://github.com/pytorch/pytorch.
@@ -250,6 +252,14 @@ You use the SageMaker PyTorch model server to host your PyTorch model when you c
 Estimator. The model server runs inside a SageMaker Endpoint, which your call to ``deploy`` creates.
 You can access the name of the Endpoint by the ``name`` property on the returned ``Predictor``.
 
+PyTorch on Amazon SageMaker has support for `Elastic Inference <https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html>`_, which allows for inference acceleration to a hosted endpoint for a fraction of the cost of using a full GPU instance.
+In order to attach an Elastic Inference accelerator to your endpoint provide the accelerator type to ``accelerator_type`` to your ``deploy`` call.
+
+.. code:: python
+
+  predictor = pytorch_estimator.deploy(instance_type='ml.m4.xlarge',
+                                       initial_instance_count=1,
+                                       accelerator_type='ml.eia2.medium')
 
 The SageMaker PyTorch Model Server
 ==================================
@@ -291,6 +301,11 @@ It loads the model parameters from a ``model.pth`` file in the SageMaker model d
             model.load_state_dict(torch.load(f))
         return model
 
+However, if you are using PyTorch Elastic Inference, you do not have to provide a ``model_fn`` since the PyTorch serving
+container has a default one for you. But please note that if you are utilizing the default ``model_fn``, please save
+yor parameter file as ``model.pt`` instead of ``model.pth``. For more information on inference script, please refer to:
+`SageMaker PyTorch Default Inference Handler <https://github.com/aws/sagemaker-pytorch-serving-container/blob/master/src/sagemaker_pytorch_serving_container/default_inference_handler.py>`_.
+
 Serve a PyTorch Model
 ---------------------
 

@@ -53,7 +53,14 @@
 )
 
 VALID_PY_VERSIONS = ["py2", "py3"]
-VALID_EIA_FRAMEWORKS = ["tensorflow", "tensorflow-serving", "mxnet", "mxnet-serving"]
+VALID_EIA_FRAMEWORKS = [
+    "tensorflow",
+    "tensorflow-serving",
+    "mxnet",
+    "mxnet-serving",
+    "pytorch-serving",
+]
+PY2_RESTRICTED_EIA_FRAMEWORKS = ["pytorch-serving"]
 VALID_ACCOUNTS_BY_REGION = {"us-gov-west-1": "246785580436", "us-iso-east-1": "744548109606"}
 ASIMOV_VALID_ACCOUNTS_BY_REGION = {"us-iso-east-1": "886529160074"}
 OPT_IN_ACCOUNTS_BY_REGION = {"ap-east-1": "057415533634", "me-south-1": "724002660598"}
@@ -71,6 +78,7 @@
     "mxnet-serving-eia": "mxnet-inference-eia",
     "pytorch": "pytorch-training",
     "pytorch-serving": "pytorch-inference",
+    "pytorch-serving-eia": "pytorch-inference-eia",
 }
 
 MERGED_FRAMEWORKS_LOWEST_VERSIONS = {
@@ -82,6 +90,7 @@
     "mxnet-serving-eia": [1, 4, 1],
     "pytorch": [1, 2, 0],
     "pytorch-serving": [1, 2, 0],
+    "pytorch-serving-eia": [1, 3, 1],
 }
 
 DEBUGGER_UNSUPPORTED_REGIONS = ["us-gov-west-1", "us-iso-east-1"]
@@ -207,6 +216,7 @@ def create_image_uri(
 
     if _accelerator_type_valid_for_framework(
         framework=framework,
+        py_version=py_version,
         accelerator_type=accelerator_type,
         optimized_families=optimized_families,
     ):
@@ -259,21 +269,27 @@ def create_image_uri(
 
 
 def _accelerator_type_valid_for_framework(
-    framework, accelerator_type=None, optimized_families=None
+    framework, py_version, accelerator_type=None, optimized_families=None
 ):
     """
     Args:
         framework:
+        py_version:
         accelerator_type:
         optimized_families:
     """
     if accelerator_type is None:
         return False
 
+    if py_version == "py2" and framework in PY2_RESTRICTED_EIA_FRAMEWORKS:
+        raise ValueError(
+            "{} is not supported with Amazon Elastic Inference in Python 2.".format(framework)
+        )
+
     if framework not in VALID_EIA_FRAMEWORKS:
         raise ValueError(
             "{} is not supported with Amazon Elastic Inference. Currently only "
-            "Python-based TensorFlow and MXNet are supported.".format(framework)
+            "Python-based TensorFlow, MXNet, PyTorch are supported.".format(framework)
         )
 
     if optimized_families:

@@ -6,6 +6,8 @@ With PyTorch Estimators and Models, you can train and host PyTorch models on Ama
 
 Supported versions of PyTorch: ``0.4.0``, ``1.0.0``, ``1.1.0``, ``1.2.0``, ``1.3.1``, ``1.4.0``.
 
+Supported versions of PyTorch for Elastic Inference: ``1.3.1``.
+
 We recommend that you use the latest supported version, because that's where we focus most of our development efforts.
 
 You can visit the PyTorch repository at https://github.com/pytorch/pytorch.

@@ -136,7 +136,7 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
                 For example, 'ml.p2.xlarge'.
             accelerator_type (str): The Elastic Inference accelerator type to
                 deploy to the instance for loading and making inferences to the
-                model. Currently unsupported with PyTorch.
+                model.
 
         Returns:
             dict[str, str]: A container definition object usable with the
@@ -169,7 +169,7 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
                 (cpu/gpu/family-specific optimized).
             accelerator_type (str): The Elastic Inference accelerator type to
                 deploy to the instance for loading and making inferences to the
-                model. Currently unsupported with PyTorch.
+                model.
 
         Returns:
             str: The appropriate image URI based on the given parameters.

@@ -0,0 +1,13 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+# This file is intentionally left blank to invoke default model_fn and predict_fn
@@ -27,6 +27,10 @@
 MNIST_DIR = os.path.join(DATA_DIR, "pytorch_mnist")
 MNIST_SCRIPT = os.path.join(MNIST_DIR, "mnist.py")
 
+EIA_DIR = os.path.join(DATA_DIR, "pytorch_eia")
+EIA_MODEL = os.path.join(EIA_DIR, "model_mnist.tar.gz")
+EIA_SCRIPT = os.path.join(EIA_DIR, "empty_inference_script.py")
+
 
 @pytest.fixture(scope="module", name="pytorch_training_job")
 def fixture_training_job(sagemaker_session, pytorch_full_version, cpu_instance_type):
@@ -115,6 +119,32 @@ def test_deploy_model(pytorch_training_job, sagemaker_session, cpu_instance_type
         assert output.shape == (batch_size, 10)
 
 
+@pytest.mark.skipif(PYTHON_VERSION == "py2", reason="PyTorch EIA does not support Python 2.")
+def test_deploy_model_with_accelerator(sagemaker_session, cpu_instance_type):
+    endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp())
+    model_data = sagemaker_session.upload_data(path=EIA_MODEL)
+    pytorch = PyTorchModel(
+        model_data,
+        "SageMakerRole",
+        framework_version="1.3.1",
+        entry_point=EIA_SCRIPT,
+        sagemaker_session=sagemaker_session,
+    )
+    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
+        predictor = pytorch.deploy(
+            initial_instance_count=1,
+            instance_type=cpu_instance_type,
+            accelerator_type="ml.eia2.medium",
+            endpoint_name=endpoint_name,
+        )
+
+        batch_size = 100
+        data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32)
+        output = predictor.predict(data)
+
+        assert output.shape == (batch_size, 10)
+
+
 def _upload_training_data(pytorch):
     return pytorch.sagemaker_session.upload_data(
         path=os.path.join(MNIST_DIR, "training"),

@@ -311,6 +311,37 @@ def test_mxnet_eia_images():
     )
 
 
+def test_pytorch_eia_images():
+    image_uri = fw_utils.create_image_uri(
+        "us-east-1",
+        "pytorch-serving",
+        "ml.c4.2xlarge",
+        "1.3.1",
+        "py3",
+        accelerator_type="ml.eia1.large",
+    )
+    assert (
+        image_uri
+        == "{}.dkr.ecr.us-east-1.amazonaws.com/pytorch-inference-eia:1.3.1-cpu-py3".format(
+            fw_utils.ASIMOV_PROD_ACCOUNT
+        )
+    )
+
+
+def test_pytorch_eia_py2_error():
+    error_message = "pytorch-serving is not supported with Amazon Elastic Inference in Python 2."
+    with pytest.raises(ValueError) as error:
+        fw_utils.create_image_uri(
+            "us-east-1",
+            "pytorch-serving",
+            "ml.c4.2xlarge",
+            "1.3.1",
+            "py2",
+            accelerator_type="ml.eia1.large",
+        )
+    assert error_message in str(error)
+
+
 def test_create_image_uri_override_account():
     image_uri = fw_utils.create_image_uri(
         "us-west-1", MOCK_FRAMEWORK, "ml.p3.2xlarge", "1.0rc", "py3", account="fake"

@@ -345,11 +345,19 @@ def test_non_mms_model(repack_model, sagemaker_session):
 
 @patch("sagemaker.fw_utils.tar_and_upload_dir", MagicMock())
 def test_model_image_accelerator(sagemaker_session):
-    model = PyTorchModel(
-        MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session
+    with pytest.raises(ValueError) as error:
+        model = PyTorchModel(
+            MODEL_DATA,
+            role=ROLE,
+            entry_point=SCRIPT_PATH,
+            sagemaker_session=sagemaker_session,
+            framework_version="1.3.1",
+            py_version="py2",
+        )
+        model.deploy(1, CPU, accelerator_type=ACCELERATOR_TYPE)
+    assert "pytorch-serving is not supported with Amazon Elastic Inference in Python 2." in str(
+        error
     )
-    with pytest.raises(ValueError):
-        model.prepare_container_def(INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE)
 
 
 def test_train_image_default(sagemaker_session):