aws · chuyang-deng · Feb 28, 2020 · Feb 27, 2020 · Feb 27, 2020 · Feb 27, 2020
diff --git a/src/sagemaker_pytorch_serving_container/default_inference_handler.py b/src/sagemaker_pytorch_serving_container/default_inference_handler.py
@@ -12,12 +12,24 @@
 # language governing permissions and limitations under the License.
 from __future__ import absolute_import
 
+import logging
+import os
+import sys
 import textwrap
 
 import torch
-
 from sagemaker_inference import content_types, decoder, default_inference_handler, encoder
 
+INFERENCE_ACCELERATOR_PRESENT_ENV = 'SAGEMAKER_INFERENCE_ACCELERATOR_PRESENT'
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+logger.addHandler(logging.StreamHandler(sys.stdout))
+
+
+class FileNotFoundError(OSError):
+    pass
+
 
 class DefaultPytorchInferenceHandler(default_inference_handler.DefaultInferenceHandler):
     VALID_CONTENT_TYPES = (content_types.JSON, content_types.NPY)
@@ -31,10 +43,18 @@ def default_model_fn(self, model_dir):
 
         Returns: A PyTorch model.
         """
-        raise NotImplementedError(textwrap.dedent("""
-        Please provide a model_fn implementation.
-        See documentation for model_fn at https://github.com/aws/sagemaker-python-sdk
-        """))
+        if os.getenv(INFERENCE_ACCELERATOR_PRESENT_ENV) == 'true':
+            default_model_filename = "model.pt"
+            model_path = os.path.join(model_dir, default_model_filename)
+            if not os.path.exists(model_path):
+                raise FileNotFoundError("Cannot find model.pt.")
+            model = torch.jit.load(model_path)
+            return model
+        else:
+            raise NotImplementedError(textwrap.dedent("""
+            Please provide a model_fn implementation.
+            See documentation for model_fn at https://github.com/aws/sagemaker-python-sdk
+            """))
 
     def default_input_fn(self, input_data, content_type):
         """A default input_fn that can handle JSON, CSV and NPZ formats.
@@ -62,12 +82,22 @@ def default_predict_fn(self, data, model):
 
         Returns: a prediction
         """
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        model.to(device)
-        input_data = data.to(device)
-        model.eval()
         with torch.no_grad():
-            output = model(input_data)
+            if os.getenv(INFERENCE_ACCELERATOR_PRESENT_ENV) == 'true':
+                logger.info(
+                    'Performing EIA inference with Torch JIT context with input of size {}'.format(data.shape))
+                device = torch.device('cpu')
+                model = model.to(device)
+                input_data = data.to(device)
+                model.eval()
+                with torch.jit.optimized_execution(True, {'target_device': 'eia:0'}):
+                    output = model(input_data)
+            else:
+                device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+                model = model.to(device)
+                input_data = data.to(device)
+                model.eval()
+                output = model(input_data)
 
         return output
 

diff --git a/test/resources/mnist/model_eia/mnist.py b/test/resources/mnist/model_eia/mnist.py
@@ -10,39 +10,4 @@
 # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
-from __future__ import absolute_import
-import logging
-import os
-import sys
-
-import torch
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
-logger.addHandler(logging.StreamHandler(sys.stdout))
-
-
-def predict_fn(input_data, model):
-    logger.info('Performing EIA inference with Torch JIT context with input of size {}'.format(input_data.shape))
-    # With EI, client instance should be CPU for cost-efficiency. Subgraphs with unsupported arguments run locally. Server runs with CUDA
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    mdoel = model.to(device)
-    input_data = input_data.to(device)
-    with torch.no_grad():
-        # Set the target device to the accelerator ordinal
-        with torch.jit.optimized_execution(True, {'target_device': 'eia:0'}):
-            return model(input_data)
-
-
-def model_fn(model_dir):
-    logger.info('model_fn: Loading model with TorchScript from {}'.format(model_dir))
-    # Scripted model is serialized with torch.jit.save().
-    # No need to instantiate model definition then load state_dict
-    model = torch.jit.load('model.pth')
-    return model
-
-
-def save_model(model, model_dir):
-    logger.info("Saving the model to {}.".format(model_dir))
-    path = os.path.join(model_dir, 'model.pth')
-    torch.jit.save(model, path)
+# This file is intentionally left blank