Skip to content

Commit 79e49d2

Browse files
committed
Add new unit and integration tests
1 parent 1fbd3f7 commit 79e49d2

File tree

23 files changed

+860
-9
lines changed

23 files changed

+860
-9
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## v2.0.18 (2023-10-10)
4+
5+
### Bug Fixes and Other Changes
6+
7+
* Fix integration tests and update Python versions
8+
39
## v2.0.17 (2023-08-07)
410

511
### Bug Fixes and Other Changes

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.0.18.dev0
1+
2.0.19.dev0

buildspec.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ phases:
5555
# build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test
5656
- python3 setup.py sdist
5757
- $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
58+
- create-key-pair
5859
- |
5960
for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS;
6061
do
61-
create-key-pair;
6262
launch-ec2-instance --instance-type $instance_type --ami-name ami-03e3ef8c92fdb39ad;
6363
DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID";
6464
build_dir="test/container/$FRAMEWORK_VERSION";
@@ -71,8 +71,10 @@ phases:
7171
execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg";
7272
docker system prune --all --force;
7373
cleanup-gpu-instances;
74-
cleanup-key-pairs;
74+
rm ~/.instance_id;
75+
rm ~/.ip_address;
7576
done
77+
- cleanup-key-pairs;
7678

7779
# run CPU sagemaker integration tests
7880
- |

test/integration/__init__.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,29 @@
1919
resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'resources'))
2020
mnist_path = os.path.join(resources_path, 'mnist')
2121
resnet18_path = os.path.join(resources_path, 'resnet18')
22+
mme_path = os.path.join(resources_path, 'mme')
23+
model_gpu_context_dir = os.path.join(resources_path, 'model_gpu_context')
2224
data_dir = os.path.join(mnist_path, 'data')
2325
training_dir = os.path.join(data_dir, 'training')
2426
cpu_sub_dir = 'model_cpu'
2527
gpu_sub_dir = 'model_gpu'
2628
eia_sub_dir = 'model_eia'
29+
inductor_sub_dir = 'model_inductor'
2730
code_sub_dir = 'code'
2831
default_sub_dir = 'default_model'
2932
default_sub_eia_dir = 'default_model_eia'
3033
default_sub_traced_resnet_dir = 'default_traced_resnet'
34+
resnet18_sub_dir = 'resnet18'
35+
traced_resnet18_sub_dir = 'traced_resnet18'
3136

3237
model_cpu_dir = os.path.join(mnist_path, cpu_sub_dir)
3338
mnist_cpu_script = os.path.join(model_cpu_dir, code_sub_dir, 'mnist.py')
39+
mnist_cpu_requirements = os.path.join(model_cpu_dir, code_sub_dir, 'requirements.txt')
3440
model_cpu_tar = file_utils.make_tarfile(mnist_cpu_script,
3541
os.path.join(model_cpu_dir, "torch_model.pth"),
3642
model_cpu_dir,
37-
script_path="code")
43+
script_path="code",
44+
requirements=mnist_cpu_requirements)
3845

3946
model_cpu_1d_dir = os.path.join(model_cpu_dir, '1d')
4047
mnist_1d_script = os.path.join(model_cpu_1d_dir, code_sub_dir, 'mnist_1d.py')
@@ -56,6 +63,12 @@
5663
os.path.join(model_eia_dir, "torch_model.pth"),
5764
model_eia_dir)
5865

66+
model_inductor_dir = os.path.join(mnist_path, inductor_sub_dir)
67+
mnist_inductor_script = os.path.join(model_inductor_dir, code_sub_dir, 'mnist.py')
68+
model_inductor_tar = file_utils.make_tarfile(mnist_inductor_script,
69+
os.path.join(model_inductor_dir, "torch_model.pth"),
70+
model_inductor_dir)
71+
5972
call_model_fn_once_script = os.path.join(model_cpu_dir, code_sub_dir, 'call_model_fn_once.py')
6073
call_model_fn_once_tar = file_utils.make_tarfile(call_model_fn_once_script,
6174
os.path.join(model_cpu_dir, "torch_model.pth"),
@@ -85,6 +98,26 @@
8598
default_model_eia_script, os.path.join(default_model_eia_dir, "model.pt"), default_model_eia_dir
8699
)
87100

101+
resnet18_model_dir = os.path.join(mme_path, resnet18_sub_dir)
102+
resnet18_script = os.path.join(resnet18_model_dir, code_sub_dir, "inference.py")
103+
resnet18_tar = file_utils.make_tarfile(
104+
resnet18_script,
105+
os.path.join(resnet18_model_dir, "model.pt"),
106+
resnet18_model_dir,
107+
filename="resnet18.tar.gz",
108+
script_path="code"
109+
)
110+
111+
traced_resnet18_model_dir = os.path.join(mme_path, traced_resnet18_sub_dir)
112+
traced_resnet18_script = os.path.join(traced_resnet18_model_dir, code_sub_dir, "inference.py")
113+
traced_resnet18_tar = file_utils.make_tarfile(
114+
traced_resnet18_script,
115+
os.path.join(traced_resnet18_model_dir, "traced_resnet18.pt"),
116+
traced_resnet18_model_dir,
117+
filename="traced_resnet18.tar.gz",
118+
script_path="code"
119+
)
120+
88121
ROLE = 'dummy/unused-role'
89122
DEFAULT_TIMEOUT = 20
90123
PYTHON3 = 'py3'
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import
14+
15+
import os
16+
import subprocess
17+
import sys
18+
import time
19+
20+
import pytest
21+
import requests
22+
import torch
23+
from concurrent.futures import ThreadPoolExecutor
24+
import csv
25+
26+
from integration import model_gpu_context_dir
27+
28+
BASE_URL = "http://0.0.0.0:8080/"
29+
PING_URL = BASE_URL + "ping"
30+
INVOCATION_URL = BASE_URL + "models/model/invoke"
31+
GPU_COUNT = torch.cuda.device_count()
32+
DEVICE_IDS_EXPECTED = [i for i in range(GPU_COUNT)]
33+
34+
35+
def send_request(input_data, headers):
36+
requests.post(INVOCATION_URL, data=input_data, headers=headers)
37+
38+
39+
def read_csv(filename):
40+
data = {}
41+
with open(os.path.join(model_gpu_context_dir, 'code', filename), 'r') as csv_file:
42+
csv_reader = csv.reader(csv_file)
43+
for row in csv_reader:
44+
device_id, pid, threadid = row
45+
if device_id in data:
46+
continue
47+
data[int(device_id)] = {'pid': pid, 'threadid': threadid}
48+
return data
49+
50+
51+
@pytest.fixture(scope="module", autouse=True)
52+
def container(image_uri):
53+
try:
54+
if 'cpu' in image_uri:
55+
pytest.skip("Skipping because tests running on CPU instance")
56+
57+
command = (
58+
"docker run --gpus=all -p 8080:8080 "
59+
"--name sagemaker-pytorch-inference-toolkit-context-test "
60+
"-v {}:/opt/ml/model "
61+
"{} serve"
62+
).format(model_gpu_context_dir, image_uri)
63+
64+
proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
65+
66+
attempts = 0
67+
while attempts < 10:
68+
time.sleep(3)
69+
try:
70+
requests.get(PING_URL)
71+
break
72+
except Exception:
73+
attempts += 1
74+
pass
75+
time.sleep(60)
76+
yield proc.pid
77+
78+
finally:
79+
if 'cpu' in image_uri:
80+
pytest.skip("Skipping because tests running on CPU instance")
81+
subprocess.check_call("docker rm -f sagemaker-pytorch-inference-toolkit-context-test".split())
82+
83+
84+
@pytest.fixture(scope="module", autouse=True)
85+
def inference_requests():
86+
headers = {"Content-Type": "application/json"}
87+
with ThreadPoolExecutor(max_workers=GPU_COUNT) as executor:
88+
for i in range(32):
89+
executor.submit(send_request, b'input', headers)
90+
time.sleep(60)
91+
yield
92+
93+
94+
@pytest.fixture(scope="module", name="model_fn_device_info")
95+
def model_fn_device_info():
96+
return read_csv("model_fn_device_info.csv")
97+
98+
99+
@pytest.fixture(scope="module", name="input_fn_device_info")
100+
def input_fn_device_info():
101+
return read_csv("input_fn_device_info.csv")
102+
103+
104+
@pytest.fixture(scope="module", name="predict_fn_device_info")
105+
def predict_fn_device_info():
106+
return read_csv("predict_fn_device_info.csv")
107+
108+
109+
@pytest.fixture(scope="module", name="output_fn_device_info")
110+
def output_fn_device_info():
111+
return read_csv("output_fn_device_info.csv")
112+
113+
114+
def test_context_all_device_ids(
115+
model_fn_device_info, input_fn_device_info, predict_fn_device_info, output_fn_device_info
116+
):
117+
for device_id in DEVICE_IDS_EXPECTED:
118+
assert device_id in model_fn_device_info
119+
assert device_id in input_fn_device_info
120+
assert device_id in predict_fn_device_info
121+
assert device_id in output_fn_device_info
122+
123+
124+
def test_same_pid_threadid(
125+
model_fn_device_info, input_fn_device_info, predict_fn_device_info, output_fn_device_info
126+
):
127+
for device_id in DEVICE_IDS_EXPECTED:
128+
pid_model_fn = model_fn_device_info[device_id]['pid']
129+
threadid_model_fn = model_fn_device_info[device_id]['threadid']
130+
131+
pid_input_fn = input_fn_device_info[device_id]['pid']
132+
threadid_input_fn = input_fn_device_info[device_id]['threadid']
133+
134+
pid_predict_fn = predict_fn_device_info[device_id]['pid']
135+
threadid_predict_fn = predict_fn_device_info[device_id]['threadid']
136+
137+
pid_output_fn = output_fn_device_info[device_id]['pid']
138+
threadid_output_fn = output_fn_device_info[device_id]['threadid']
139+
140+
assert pid_model_fn == pid_input_fn == pid_output_fn == pid_predict_fn
141+
assert threadid_model_fn == threadid_input_fn == threadid_output_fn == threadid_predict_fn

0 commit comments

Comments
 (0)