File tree 4 files changed +10
-10
lines changed
4 files changed +10
-10
lines changed Original file line number Diff line number Diff line change 1
1
apiVersion : inference.networking.x-k8s.io/v1alpha2
2
2
kind : InferenceModel
3
3
metadata :
4
- name : inferencemodel-sample
4
+ name : tweet-summarizer
5
5
spec :
6
6
modelName : food-review
7
7
criticality : Standard
15
15
apiVersion : inference.networking.x-k8s.io/v1alpha2
16
16
kind : InferenceModel
17
17
metadata :
18
- name : inferencemodel- base-model
18
+ name : base-model
19
19
spec :
20
20
modelName : meta-llama/Llama-3.1-8B-Instruct
21
21
criticality : Critical
26
26
apiVersion : inference.networking.x-k8s.io/v1alpha2
27
27
kind : InferenceModel
28
28
metadata :
29
- name : inferencemodel- base-model-cpu
29
+ name : base-model-cpu
30
30
spec :
31
31
modelName : Qwen/Qwen2.5-1.5B-Instruct
32
32
criticality : Critical
Original file line number Diff line number Diff line change @@ -4,12 +4,12 @@ metadata:
4
4
labels :
5
5
app.kubernetes.io/name : api
6
6
app.kubernetes.io/managed-by : kustomize
7
- name : inferencemodel- sample
7
+ name : sample-sql-assist
8
8
spec :
9
9
criticality : Critical
10
10
modelName : sql-code-assist
11
11
poolRef :
12
- name : inferencepool- sample
12
+ name : vllm-llama-31-8b- sample-pool
13
13
targetModels :
14
14
- name : npc-bot-v1
15
15
weight : 50
Original file line number Diff line number Diff line change @@ -4,7 +4,7 @@ metadata:
4
4
labels :
5
5
app.kubernetes.io/name : api
6
6
app.kubernetes.io/managed-by : kustomize
7
- name : inferencepool- sample
7
+ name : vllm-llama-31-8b- sample-pool
8
8
spec :
9
9
selector :
10
10
app : npc-bot
Original file line number Diff line number Diff line change 13
13
apiVersion : inference.networking.x-k8s.io/v1alpha2
14
14
kind : InferenceModel
15
15
metadata :
16
- name : inferencemodel- sample
16
+ name : sample
17
17
namespace : default
18
18
spec :
19
19
modelName : sql-lora
27
27
apiVersion : inference.networking.x-k8s.io/v1alpha2
28
28
kind : InferenceModel
29
29
metadata :
30
- name : inferencemodel- sheddable
30
+ name : sheddable
31
31
namespace : default
32
32
spec :
33
33
modelName : sql-lora-sheddable
40
40
apiVersion : inference.networking.x-k8s.io/v1alpha2
41
41
kind : InferenceModel
42
42
metadata :
43
- name : inferencemodel- generic
43
+ name : generic
44
44
namespace : default
45
45
spec :
46
46
modelName : my-model
54
54
apiVersion : inference.networking.x-k8s.io/v1alpha2
55
55
kind : InferenceModel
56
56
metadata :
57
- name : inferencemodel- direct-model-name
57
+ name : direct-model-name
58
58
namespace : default
59
59
spec :
60
60
modelName : direct-model
You can’t perform that action at this time.
0 commit comments