File tree 2 files changed +8
-6
lines changed
2 files changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -4,8 +4,8 @@ metadata:
4
4
labels :
5
5
name : vllm-llama2-7b-pool
6
6
spec :
7
- targetPort : 8000
8
- modelServerSelector :
7
+ targetPortNumber : 8000
8
+ selector :
9
9
" app " : " vllm-llama2-7b-pool"
10
10
---
11
11
apiVersion : inference.networking.x-k8s.io/v1alpha1
@@ -16,7 +16,7 @@ metadata:
16
16
app.kubernetes.io/managed-by : kustomize
17
17
name : inferencemodel-sample
18
18
spec :
19
- modelName : sql-lora
19
+ modelName : tweet-summary
20
20
criticality : Critical
21
21
poolRef :
22
22
# this is the default val:
25
25
kind : InferencePool
26
26
name : vllm-llama2-7b-pool
27
27
targetModels :
28
- - name : sql-lora-1fdg2
29
- weight : 100
28
+ - name : tweet-summary-0
29
+ weight : 50
30
+ - name : tweet-summary-1
31
+ weight : 50
30
32
Original file line number Diff line number Diff line change @@ -43,7 +43,7 @@ The current manifests rely on Envoy Gateway [v1.2.1](https://gateway.envoyproxy.
43
43
Wait until the gateway is ready.
44
44
45
45
``` bash
46
- IP=$( kubectl get gateway/instance -gateway -o jsonpath=' {.status.addresses[0].value}' )
46
+ IP=$( kubectl get gateway/inference -gateway -o jsonpath=' {.status.addresses[0].value}' )
47
47
PORT=8081
48
48
49
49
curl -i ${IP} :${PORT} /v1/completions -H ' Content-Type: application/json' -d ' {
You can’t perform that action at this time.
0 commit comments