Skip to content

Commit 9a5db2c

Browse files
committed
add regression testing docs
update traffic split setup update traffic split setup update requirement update regressing testig doc consolidate performance docs add newline add example yamls for multi lora deployment and regression lgp testing fix qps range fix typo fix typo fix typo fix typo fix typo fix typo fix typo fix broken link add instructions to build lpg image update benchmark.yaml update lpg yamls update readme update regfression testing markdown to refine docker image creating for LPG update regression yamls refine regression doc
1 parent cea06e2 commit 9a5db2c

File tree

8 files changed

+913
-13
lines changed

8 files changed

+913
-13
lines changed
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
apiVersion: inference.networking.x-k8s.io/v1alpha2
2+
kind: InferenceModel
3+
metadata:
4+
name: adapter-0
5+
spec:
6+
modelName: adapter-0
7+
criticality: Critical
8+
poolRef:
9+
name: vllm-llama3-8b-instruct
10+
targetModels:
11+
- name: adapter-0
12+
weight: 100
13+
14+
---
15+
16+
apiVersion: inference.networking.x-k8s.io/v1alpha2
17+
kind: InferenceModel
18+
metadata:
19+
name: adapter-1
20+
spec:
21+
modelName: adapter-1
22+
criticality: Critical
23+
poolRef:
24+
name: vllm-llama3-8b-instruct
25+
targetModels:
26+
- name: adapter-1
27+
weight: 100
28+
29+
---
30+
31+
apiVersion: inference.networking.x-k8s.io/v1alpha2
32+
kind: InferenceModel
33+
metadata:
34+
name: adapter-2
35+
spec:
36+
modelName: adapter-2
37+
criticality: Critical
38+
poolRef:
39+
name: vllm-llama3-8b-instruct
40+
targetModels:
41+
- name: adapter-2
42+
weight: 100
43+
44+
---
45+
46+
apiVersion: inference.networking.x-k8s.io/v1alpha2
47+
kind: InferenceModel
48+
metadata:
49+
name: adapter-3
50+
spec:
51+
modelName: adapter-3
52+
criticality: Critical
53+
poolRef:
54+
name: vllm-llama3-8b-instruct
55+
targetModels:
56+
- name: adapter-3
57+
weight: 100
58+
59+
---
60+
61+
apiVersion: inference.networking.x-k8s.io/v1alpha2
62+
kind: InferenceModel
63+
metadata:
64+
name: adapter-4
65+
spec:
66+
modelName: adapter-4
67+
criticality: Critical
68+
poolRef:
69+
name: vllm-llama3-8b-instruct
70+
targetModels:
71+
- name: adapter-4
72+
weight: 100
73+
74+
---
75+
76+
apiVersion: inference.networking.x-k8s.io/v1alpha2
77+
kind: InferenceModel
78+
metadata:
79+
name: adapter-5
80+
spec:
81+
modelName: adapter-5
82+
criticality: Critical
83+
poolRef:
84+
name: vllm-llama3-8b-instruct
85+
targetModels:
86+
- name: adapter-5
87+
weight: 100
88+
89+
---
90+
91+
apiVersion: inference.networking.x-k8s.io/v1alpha2
92+
kind: InferenceModel
93+
metadata:
94+
name: adapter-6
95+
spec:
96+
modelName: adapter-6
97+
criticality: Critical
98+
poolRef:
99+
name: vllm-llama3-8b-instruct
100+
targetModels:
101+
- name: adapter-6
102+
weight: 100
103+
104+
---
105+
106+
apiVersion: inference.networking.x-k8s.io/v1alpha2
107+
kind: InferenceModel
108+
metadata:
109+
name: adapter-7
110+
spec:
111+
modelName: adapter-7
112+
criticality: Critical
113+
poolRef:
114+
name: vllm-llama3-8b-instruct
115+
targetModels:
116+
- name: adapter-7
117+
weight: 100
118+
119+
---
120+
121+
apiVersion: inference.networking.x-k8s.io/v1alpha2
122+
kind: InferenceModel
123+
metadata:
124+
name: adapter-8
125+
spec:
126+
modelName: adapter-8
127+
criticality: Critical
128+
poolRef:
129+
name: vllm-llama3-8b-instruct
130+
targetModels:
131+
- name: adapter-8
132+
weight: 100
133+
134+
---
135+
136+
apiVersion: inference.networking.x-k8s.io/v1alpha2
137+
kind: InferenceModel
138+
metadata:
139+
name: adapter-9
140+
spec:
141+
modelName: adapter-9
142+
criticality: Critical
143+
poolRef:
144+
name: vllm-llama3-8b-instruct
145+
targetModels:
146+
- name: adapter-9
147+
weight: 100
148+
149+
---
150+
151+
apiVersion: inference.networking.x-k8s.io/v1alpha2
152+
kind: InferenceModel
153+
metadata:
154+
name: adapter-10
155+
spec:
156+
modelName: adapter-10
157+
criticality: Critical
158+
poolRef:
159+
name: vllm-llama3-8b-instruct
160+
targetModels:
161+
- name: adapter-10
162+
weight: 100
163+
164+
---
165+
166+
apiVersion: inference.networking.x-k8s.io/v1alpha2
167+
kind: InferenceModel
168+
metadata:
169+
name: adapter-11
170+
spec:
171+
modelName: adapter-11
172+
criticality: Critical
173+
poolRef:
174+
name: vllm-llama3-8b-instruct
175+
targetModels:
176+
- name: adapter-11
177+
weight: 100
178+
179+
---
180+
181+
apiVersion: inference.networking.x-k8s.io/v1alpha2
182+
kind: InferenceModel
183+
metadata:
184+
name: adapter-12
185+
spec:
186+
modelName: adapter-12
187+
criticality: Critical
188+
poolRef:
189+
name: vllm-llama3-8b-instruct
190+
targetModels:
191+
- name: adapter-12
192+
weight: 100
193+
194+
195+
---
196+
197+
apiVersion: inference.networking.x-k8s.io/v1alpha2
198+
kind: InferenceModel
199+
metadata:
200+
name: adapter-13
201+
spec:
202+
modelName: adapter-13
203+
criticality: Critical
204+
poolRef:
205+
name: vllm-llama3-8b-instruct
206+
targetModels:
207+
- name: adapter-13
208+
weight: 100
209+
210+
211+
---
212+
213+
apiVersion: inference.networking.x-k8s.io/v1alpha2
214+
kind: InferenceModel
215+
metadata:
216+
name: adapter-14
217+
spec:
218+
modelName: adapter-14
219+
criticality: Critical
220+
poolRef:
221+
name: vllm-llama3-8b-instruct
222+
targetModels:
223+
- name: adapter-14
224+
weight: 100
225+
226+
---
227+
228+
229+
apiVersion: inference.networking.x-k8s.io/v1alpha2
230+
kind: InferenceModel
231+
metadata:
232+
name: base-model
233+
spec:
234+
modelName: meta-llama/Llama-3.1-8B-Instruct
235+
criticality: Critical
236+
poolRef:
237+
name: vllm-llama3-8b-instruct
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
labels:
5+
app: benchmark-tool
6+
name: benchmark-tool
7+
spec:
8+
replicas: 1
9+
selector:
10+
matchLabels:
11+
app: benchmark-tool
12+
template:
13+
metadata:
14+
labels:
15+
app: benchmark-tool
16+
spec:
17+
containers:
18+
# Build image from this source https://github.com/AI-Hypercomputer/inference-benchmark/blob/1c92df607751a7ddb04e2152ed7f6aaf85bd9ca7
19+
- image: '<DOCKER_IMAGE>'
20+
imagePullPolicy: Always
21+
name: benchmark-tool
22+
command:
23+
- bash
24+
- -c
25+
- ./latency_throughput_curve.sh
26+
env:
27+
- name: IP
28+
value: '<target-ip>'
29+
- name: REQUEST_RATES
30+
value: '20,40,60,80,100,120,140,160,180,200'
31+
- name: BENCHMARK_TIME_SECONDS
32+
value: '300'
33+
- name: TOKENIZER
34+
value: 'meta-llama/Llama-3.1-8B-Instruct'
35+
- name: MODELS
36+
value: 'adapter-0,adapter-1,adapter-2,adapter-3,adapter-4,adapter-5,adapter-6,adapter-7,adapter-8,adapter-9,adapter-10,adapter-11,adapter-12,adapter-13,adapter-14'
37+
- name: TRAFFIC_SPLIT
38+
value: '0.12,0.12,0.12,0.12,0.12,0.06,0.06,0.06,0.06,0.06,0.02,0.02,0.02,0.02,0.02'
39+
- name: BACKEND
40+
value: vllm
41+
- name: PORT
42+
value: "80"
43+
- name: INPUT_LENGTH
44+
value: "1024"
45+
- name: OUTPUT_LENGTH
46+
value: '1024'
47+
- name: FILE_PREFIX
48+
value: benchmark
49+
- name: PROMPT_DATASET_FILE
50+
value: Infinity-Instruct_conversations.json
51+
- name: HF_TOKEN
52+
valueFrom:
53+
secretKeyRef:
54+
key: token
55+
name: hf-token
56+
resources:
57+
limits:
58+
cpu: "2"
59+
memory: 20Gi
60+
requests:
61+
cpu: "2"
62+
memory: 20Gi
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
labels:
5+
app: benchmark-tool
6+
name: benchmark-tool
7+
spec:
8+
replicas: 1
9+
selector:
10+
matchLabels:
11+
app: benchmark-tool
12+
template:
13+
metadata:
14+
labels:
15+
app: benchmark-tool
16+
spec:
17+
containers:
18+
# Build image from this source https://github.com/AI-Hypercomputer/inference-benchmark/blob/1c92df607751a7ddb04e2152ed7f6aaf85bd9ca7
19+
- image: '<DOCKER_IMAGE>'
20+
imagePullPolicy: Always
21+
name: benchmark-tool
22+
command:
23+
- bash
24+
- -c
25+
- ./latency_throughput_curve.sh
26+
env:
27+
- name: IP
28+
value: '<target-ip>'
29+
- name: REQUEST_RATES
30+
value: '300,310,320,330,340,350'
31+
- name: BENCHMARK_TIME_SECONDS
32+
value: '300'
33+
- name: TOKENIZER
34+
value: 'meta-llama/Llama-3.1-8B-Instruct'
35+
- name: MODELS
36+
value: 'meta-llama/Llama-3.1-8B-Instruct'
37+
- name: BACKEND
38+
value: vllm
39+
- name: PORT
40+
value: "80"
41+
- name: INPUT_LENGTH
42+
value: "1024"
43+
- name: OUTPUT_LENGTH
44+
value: '1024'
45+
- name: FILE_PREFIX
46+
value: benchmark
47+
- name: PROMPT_DATASET_FILE
48+
value: billsum_conversations.json
49+
- name: HF_TOKEN
50+
valueFrom:
51+
secretKeyRef:
52+
key: token
53+
name: hf-token
54+
resources:
55+
limits:
56+
cpu: "2"
57+
memory: 20Gi
58+
requests:
59+
cpu: "2"
60+
memory: 20Gi

0 commit comments

Comments
 (0)