Skip to content

Commit 48d5d98

Browse files
committed
Add test to check device, pid and threadid are same for input_fn, output_fn and predict_fn
1 parent 68bfa5d commit 48d5d98

File tree

2 files changed

+88
-10
lines changed

2 files changed

+88
-10
lines changed

test/integration/local/test_model_fn_context.py renamed to test/integration/local/test_model_context.py

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,16 @@
2020
import pytest
2121
import requests
2222
import torch
23+
from concurrent.futures import ThreadPoolExecutor
24+
import csv
2325

2426
from integration import model_gpu_context_dir
2527

2628
BASE_URL = "http://0.0.0.0:8080/"
2729
PING_URL = BASE_URL + "ping"
30+
INVOCATION_URL = BASE_URL + "models/model/invoke"
31+
GPU_COUNT = torch.cuda.device_count()
32+
GPU_IDS_EXPECTED = [i for i in range(GPU_COUNT)]
2833

2934

3035
@pytest.fixture(scope="module", autouse=True)
@@ -34,7 +39,7 @@ def container(image_uri):
3439
pytest.skip("Skipping because tests running on CPU instance")
3540

3641
command = (
37-
"docker run --gpus=all "
42+
"docker run --gpus=all -p 8080:8080 "
3843
"--name sagemaker-pytorch-inference-toolkit-context-test "
3944
"-v {}:/opt/ml/model "
4045
"{} serve"
@@ -60,16 +65,53 @@ def container(image_uri):
6065

6166

6267
def test_context_all_device_ids():
63-
gpu_count = torch.cuda.device_count()
68+
time.sleep(5)
6469

65-
gpu_ids_expected = [i for i in range(gpu_count)]
6670
gpu_ids_actual = []
67-
68-
with open(os.path.join(model_gpu_context_dir, 'code', 'device_info.txt'), 'r') as f:
71+
with open(os.path.join(model_gpu_context_dir, 'code', 'model_fn_device_info.txt'), 'r') as f:
6972
for line in f:
7073
gpu_ids_actual.append(int(line))
7174

7275
gpu_ids_actual = list(set(gpu_ids_actual))
7376
gpu_ids_actual.sort()
7477

75-
assert gpu_ids_actual == gpu_ids_expected
78+
assert gpu_ids_actual == GPU_IDS_EXPECTED
79+
80+
81+
def test_same_pid_threadid():
82+
time.sleep(5)
83+
headers = {"Content-Type": "application/json"}
84+
with ThreadPoolExecutor(max_workers=GPU_COUNT) as executor:
85+
for i in range(GPU_COUNT):
86+
executor.submit(send_request, b'input', headers)
87+
88+
input_fn_device_info = read_csv("input_fn_device_info.csv")
89+
output_fn_device_info = read_csv("output_fn_device_info.csv")
90+
predict_fn_device_info = read_csv("predict_fn_device_info.csv")
91+
92+
assert len(input_fn_device_info) == len(output_fn_device_info) == len(predict_fn_device_info)
93+
94+
for input_fn_row, output_fn_row, predict_fn_row in zip(
95+
input_fn_device_info, output_fn_device_info, predict_fn_device_info
96+
):
97+
98+
device_id_input_fn, pid_input_fn, threadid_input_fn = input_fn_row
99+
device_id_output_fn, pid_output_fn, threadid_output_fn = output_fn_row
100+
device_id_predict_fn, pid_predict_fn, threadid_predict_fn = predict_fn_row
101+
102+
assert device_id_input_fn == device_id_output_fn == device_id_predict_fn
103+
assert pid_input_fn == pid_output_fn == pid_predict_fn
104+
assert threadid_input_fn == threadid_output_fn == threadid_predict_fn
105+
106+
107+
def send_request(input_data, headers):
108+
requests.post(INVOCATION_URL, data=input_data, headers=headers)
109+
110+
111+
def read_csv(filename):
112+
data = []
113+
with open(os.path.join(model_gpu_context_dir, 'code', filename), 'r') as csv_file:
114+
csv_reader = csv.reader(csv_file)
115+
for row in csv_reader:
116+
data.append(row)
117+
return data

test/resources/model_gpu_context/code/inference.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,50 @@
1414

1515
import os
1616
import torch
17+
import csv
18+
import threading
1719

18-
def model_fn(model_dir, context):
19-
script_dir = os.path.dirname(os.path.abspath(__file__))
20-
file_path = os.path.join(script_dir, "device_info.txt")
21-
20+
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
21+
22+
def create_model_fn_device_list(filename, context):
23+
file_path = os.path.join(SCRIPT_DIR, filename)
2224
device = torch.device("cuda:" + str(context.system_properties.get("gpu_id")))
2325
device_str = str(device)[-1]
2426
with open(file_path, "a") as file:
2527
file.write(device_str + "\n")
28+
return
29+
30+
def create_device_info_csv(filename, context):
31+
pid = os.getpid()
32+
threadid = threading.current_thread().ident
33+
34+
device = torch.device("cuda:" + str(context.system_properties.get("gpu_id")))
35+
device_id = str(device)[-1]
36+
37+
data = [device_id, pid, threadid]
38+
file_exists = os.path.isfile(filename)
39+
40+
with open(filename, "a", newline="") as csv_file:
41+
writer = csv.writer(csv_file)
42+
writer.writerow(data)
2643

44+
return
45+
46+
def model_fn(model_dir, context):
47+
create_model_fn_device_list("model_fn_device_info.txt", context)
2748
return 'model'
49+
50+
51+
def input_fn(data, content_type ,context):
52+
create_device_info_csv("input_fn_device_info.csv", context)
53+
return data
54+
55+
56+
def predict_fn(data, model, context):
57+
create_device_info_csv("predict_fn_device_info.csv", context)
58+
return b'output'
59+
60+
61+
def output_fn(prediction, accept, context):
62+
create_device_info_csv("output_fn_device_info.csv", context)
63+
return prediction

0 commit comments

Comments
 (0)