File tree 5 files changed +13
-13
lines changed 5 files changed +13
-13
lines changed Original file line number Diff line number Diff line change @@ -44,11 +44,11 @@ apiVersion: inference.networking.x-k8s.io/v1alpha2
44
44
kind : InferencePool
45
45
metadata :
46
46
labels :
47
- name : my -pool
47
+ name : vllm-llama2-7b -pool
48
48
spec :
49
49
targetPortNumber : 8000
50
50
selector :
51
- app : my -pool
51
+ app : vllm-llama2-7b -pool
52
52
extensionRef :
53
53
name : inference-gateway-ext-proc
54
54
---
75
75
imagePullPolicy : Always
76
76
args :
77
77
- -poolName
78
- - " my -pool"
78
+ - " vllm-llama2-7b -pool"
79
79
- -v
80
80
- " 4"
81
81
- -grpcPort
Original file line number Diff line number Diff line change 6
6
modelName : tweet-summary
7
7
criticality : Critical
8
8
poolRef :
9
- name : my -pool
9
+ name : vllm-llama2-7b -pool
10
10
targetModels :
11
11
- name : tweet-summary-1
12
12
weight : 100
20
20
modelName : meta-llama/Llama-2-7b-hf
21
21
criticality : Critical
22
22
poolRef :
23
- name : my -pool
23
+ name : vllm-llama2-7b -pool
24
24
25
25
---
26
26
apiVersion : inference.networking.x-k8s.io/v1alpha2
31
31
modelName : Qwen/Qwen2.5-1.5B-Instruct
32
32
criticality : Critical
33
33
poolRef :
34
- name : my -pool
34
+ name : vllm-llama2-7b -pool
Original file line number Diff line number Diff line change 1
1
apiVersion : apps/v1
2
2
kind : Deployment
3
3
metadata :
4
- name : my -pool
4
+ name : vllm-llama2-7b -pool
5
5
spec :
6
6
replicas : 3
7
7
selector :
8
8
matchLabels :
9
- app : my -pool
9
+ app : vllm-llama2-7b -pool
10
10
template :
11
11
metadata :
12
12
labels :
13
- app : my -pool
13
+ app : vllm-llama2-7b -pool
14
14
spec :
15
15
containers :
16
16
- name : lora
Original file line number Diff line number Diff line change 1
1
apiVersion : apps/v1
2
2
kind : Deployment
3
3
metadata :
4
- name : my -pool
4
+ name : vllm-llama2-7b -pool
5
5
spec :
6
6
replicas : 3
7
7
selector :
8
8
matchLabels :
9
- app : my -pool
9
+ app : vllm-llama2-7b -pool
10
10
template :
11
11
metadata :
12
12
labels :
13
- app : my -pool
13
+ app : vllm-llama2-7b -pool
14
14
spec :
15
15
containers :
16
16
- name : lora
Original file line number Diff line number Diff line change @@ -57,7 +57,7 @@ const (
57
57
// TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed
58
58
nsName = "default"
59
59
// modelServerName is the name of the model server test resources.
60
- modelServerName = "my -pool"
60
+ modelServerName = "vllm-llama2-7b -pool"
61
61
// modelName is the test model name.
62
62
modelName = "tweet-summary"
63
63
// envoyName is the name of the envoy proxy test resources.
You can’t perform that action at this time.
0 commit comments