aws
diff --git a/‎CHANGELOG.md
Lines changed: 6 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 6 additions & 0 deletions
diff --git a/‎VERSION
Lines changed: 1 addition & 1 deletion b/‎VERSION
Lines changed: 1 addition & 1 deletion
diff --git a/‎buildspec.yml
Lines changed: 4 additions & 2 deletions b/‎buildspec.yml
Lines changed: 4 additions & 2 deletions
diff --git a/‎test/integration/__init__.py
Lines changed: 34 additions & 1 deletion b/‎test/integration/__init__.py
Lines changed: 34 additions & 1 deletion
diff --git a/‎test/integration/local/test_serving.py renamed to ‎test/integration/local/test_mnist_serving.py b/‎test/integration/local/test_serving.py renamed to ‎test/integration/local/test_mnist_serving.py
diff --git a/‎test/integration/local/test_model_context.py
Lines changed: 141 additions & 0 deletions b/‎test/integration/local/test_model_context.py
Lines changed: 141 additions & 0 deletions
@@ -1,5 +1,11 @@
 # Changelog
 
+## v2.0.18 (2023-10-10)
+
+### Bug Fixes and Other Changes
+
+ * Fix integration tests and update Python versions
+
 ## v2.0.17 (2023-08-07)
 
 ### Bug Fixes and Other Changes
 
@@ -1 +1 @@
-2.0.18.dev0
+2.0.19.dev0
@@ -55,10 +55,10 @@ phases:
       # build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test
       - python3 setup.py sdist
       - $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
+      - create-key-pair
       - |
         for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS;
           do
-            create-key-pair;
             launch-ec2-instance --instance-type $instance_type --ami-name ami-03e3ef8c92fdb39ad;
             DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID";
             build_dir="test/container/$FRAMEWORK_VERSION";
@@ -71,8 +71,10 @@ phases:
             execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg";
             docker system prune --all --force;
             cleanup-gpu-instances;
-            cleanup-key-pairs;
+            rm ~/.instance_id;
+            rm ~/.ip_address;
           done
+      - cleanup-key-pairs;
 
       # run CPU sagemaker integration tests
       - |
 
@@ -19,22 +19,29 @@
 resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'resources'))
 mnist_path = os.path.join(resources_path, 'mnist')
 resnet18_path = os.path.join(resources_path, 'resnet18')
+mme_path = os.path.join(resources_path, 'mme')
+model_gpu_context_dir = os.path.join(resources_path, 'model_gpu_context')
 data_dir = os.path.join(mnist_path, 'data')
 training_dir = os.path.join(data_dir, 'training')
 cpu_sub_dir = 'model_cpu'
 gpu_sub_dir = 'model_gpu'
 eia_sub_dir = 'model_eia'
+inductor_sub_dir = 'model_inductor'
 code_sub_dir = 'code'
 default_sub_dir = 'default_model'
 default_sub_eia_dir = 'default_model_eia'
 default_sub_traced_resnet_dir = 'default_traced_resnet'
+resnet18_sub_dir = 'resnet18'
+traced_resnet18_sub_dir = 'traced_resnet18'
 
 model_cpu_dir = os.path.join(mnist_path, cpu_sub_dir)
 mnist_cpu_script = os.path.join(model_cpu_dir, code_sub_dir, 'mnist.py')
+mnist_cpu_requirements = os.path.join(model_cpu_dir, code_sub_dir, 'requirements.txt')
 model_cpu_tar = file_utils.make_tarfile(mnist_cpu_script,
                                         os.path.join(model_cpu_dir, "torch_model.pth"),
                                         model_cpu_dir,
-                                        script_path="code")
+                                        script_path="code",
+                                        requirements=mnist_cpu_requirements)
 
 model_cpu_1d_dir = os.path.join(model_cpu_dir, '1d')
 mnist_1d_script = os.path.join(model_cpu_1d_dir, code_sub_dir, 'mnist_1d.py')
@@ -56,6 +63,12 @@
                                         os.path.join(model_eia_dir, "torch_model.pth"),
                                         model_eia_dir)
 
+model_inductor_dir = os.path.join(mnist_path, inductor_sub_dir)
+mnist_inductor_script = os.path.join(model_inductor_dir, code_sub_dir, 'mnist.py')
+model_inductor_tar = file_utils.make_tarfile(mnist_inductor_script,
+                                             os.path.join(model_inductor_dir, "torch_model.pth"),
+                                             model_inductor_dir)
+
 call_model_fn_once_script = os.path.join(model_cpu_dir, code_sub_dir, 'call_model_fn_once.py')
 call_model_fn_once_tar = file_utils.make_tarfile(call_model_fn_once_script,
                                                  os.path.join(model_cpu_dir, "torch_model.pth"),
@@ -85,6 +98,26 @@
     default_model_eia_script, os.path.join(default_model_eia_dir, "model.pt"), default_model_eia_dir
 )
 
+resnet18_model_dir = os.path.join(mme_path, resnet18_sub_dir)
+resnet18_script = os.path.join(resnet18_model_dir, code_sub_dir, "inference.py")
+resnet18_tar = file_utils.make_tarfile(
+    resnet18_script,
+    os.path.join(resnet18_model_dir, "model.pt"),
+    resnet18_model_dir,
+    filename="resnet18.tar.gz",
+    script_path="code"
+)
+
+traced_resnet18_model_dir = os.path.join(mme_path, traced_resnet18_sub_dir)
+traced_resnet18_script = os.path.join(traced_resnet18_model_dir, code_sub_dir, "inference.py")
+traced_resnet18_tar = file_utils.make_tarfile(
+    traced_resnet18_script,
+    os.path.join(traced_resnet18_model_dir, "traced_resnet18.pt"),
+    traced_resnet18_model_dir,
+    filename="traced_resnet18.tar.gz",
+    script_path="code"
+)
+
 ROLE = 'dummy/unused-role'
 DEFAULT_TIMEOUT = 20
 PYTHON3 = 'py3'
 
@@ -0,0 +1,141 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import os
+import subprocess
+import sys
+import time
+
+import pytest
+import requests
+import torch
+from concurrent.futures import ThreadPoolExecutor
+import csv
+
+from integration import model_gpu_context_dir
+
+BASE_URL = "http://0.0.0.0:8080/"
+PING_URL = BASE_URL + "ping"
+INVOCATION_URL = BASE_URL + "models/model/invoke"
+GPU_COUNT = torch.cuda.device_count()
+DEVICE_IDS_EXPECTED = [i for i in range(GPU_COUNT)]
+
+
+def send_request(input_data, headers):
+    requests.post(INVOCATION_URL, data=input_data, headers=headers)
+
+
+def read_csv(filename):
+    data = {}
+    with open(os.path.join(model_gpu_context_dir, 'code', filename), 'r') as csv_file:
+        csv_reader = csv.reader(csv_file)
+        for row in csv_reader:
+            device_id, pid, threadid = row
+            if device_id in data:
+                continue
+            data[int(device_id)] = {'pid': pid, 'threadid': threadid}
+    return data
+
+
+@pytest.fixture(scope="module", autouse=True)
+def container(image_uri):
+    try:
+        if 'cpu' in image_uri:
+            pytest.skip("Skipping because tests running on CPU instance")
+
+        command = (
+            "docker run --gpus=all -p 8080:8080 "
+            "--name sagemaker-pytorch-inference-toolkit-context-test "
+            "-v {}:/opt/ml/model "
+            "{} serve"
+        ).format(model_gpu_context_dir, image_uri)
+
+        proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
+
+        attempts = 0
+        while attempts < 10:
+            time.sleep(3)
+            try:
+                requests.get(PING_URL)
+                break
+            except Exception:
+                attempts += 1
+                pass
+        time.sleep(60)
+        yield proc.pid
+
+    finally:
+        if 'cpu' in image_uri:
+            pytest.skip("Skipping because tests running on CPU instance")
+        subprocess.check_call("docker rm -f sagemaker-pytorch-inference-toolkit-context-test".split())
+
+
+@pytest.fixture(scope="module", autouse=True)
+def inference_requests():
+    headers = {"Content-Type": "application/json"}
+    with ThreadPoolExecutor(max_workers=GPU_COUNT) as executor:
+        for i in range(32):
+            executor.submit(send_request, b'input', headers)
+    time.sleep(60)
+    yield
+
+
+@pytest.fixture(scope="module", name="model_fn_device_info")
+def model_fn_device_info():
+    return read_csv("model_fn_device_info.csv")
+
+
+@pytest.fixture(scope="module", name="input_fn_device_info")
+def input_fn_device_info():
+    return read_csv("input_fn_device_info.csv")
+
+
+@pytest.fixture(scope="module", name="predict_fn_device_info")
+def predict_fn_device_info():
+    return read_csv("predict_fn_device_info.csv")
+
+
+@pytest.fixture(scope="module", name="output_fn_device_info")
+def output_fn_device_info():
+    return read_csv("output_fn_device_info.csv")
+
+
+def test_context_all_device_ids(
+    model_fn_device_info, input_fn_device_info, predict_fn_device_info, output_fn_device_info
+):
+    for device_id in DEVICE_IDS_EXPECTED:
+        assert device_id in model_fn_device_info
+        assert device_id in input_fn_device_info
+        assert device_id in predict_fn_device_info
+        assert device_id in output_fn_device_info
+
+
+def test_same_pid_threadid(
+    model_fn_device_info, input_fn_device_info, predict_fn_device_info, output_fn_device_info
+):
+    for device_id in DEVICE_IDS_EXPECTED:
+        pid_model_fn = model_fn_device_info[device_id]['pid']
+        threadid_model_fn = model_fn_device_info[device_id]['threadid']
+
+        pid_input_fn = input_fn_device_info[device_id]['pid']
+        threadid_input_fn = input_fn_device_info[device_id]['threadid']
+
+        pid_predict_fn = predict_fn_device_info[device_id]['pid']
+        threadid_predict_fn = predict_fn_device_info[device_id]['threadid']
+
+        pid_output_fn = output_fn_device_info[device_id]['pid']
+        threadid_output_fn = output_fn_device_info[device_id]['threadid']
+
+        assert pid_model_fn == pid_input_fn == pid_output_fn == pid_predict_fn
+        assert threadid_model_fn == threadid_input_fn == threadid_output_fn == threadid_predict_fn